r285493 - [X86][AVX512][Clang][Intrinsics][reduce] Adding missing reduce (max|min) intrinsics to Clang .

Sanjay Patel via cfe-commits cfe-commits at lists.llvm.org
Fri Feb 3 14:48:05 PST 2017


Hi Michael,

Please change the test file to not use -O2. We shouldn't be testing the
optimizer from a clang regression test.

This caused bot breakage after:
https://reviews.llvm.org/rL294049

...so I just hacked it to get things back to green quickly:
https://reviews.llvm.org/rL294058

On Sat, Oct 29, 2016 at 4:29 AM, Michael Zuckerman via cfe-commits <
cfe-commits at lists.llvm.org> wrote:

> Author: mzuckerm
> Date: Sat Oct 29 05:29:20 2016
> New Revision: 285493
>
> URL: http://llvm.org/viewvc/llvm-project?rev=285493&view=rev
> Log:
> [X86][AVX512][Clang][Intrinsics][reduce] Adding missing reduce (max|min)
> intrinsics to Clang .
>
> After LGTM and Check-all
>
> Vector-reduction arithmetic accepts vectors as inputs and produces
> scalars as outputs.This class of vector operation forms the basis
> of many scientific computations. In vector-reduction arithmetic,
> the evaluation off is independent of the order of the input elements of V.
>
> Reviewer: 1. craig.topper
>           2. igorb
>
> Differential Revision: https://reviews.llvm.org/D25988
>
>
> Added:
>     cfe/trunk/test/CodeGen/avx512-reduceMinMaxIntrin.c
> Modified:
>     cfe/trunk/lib/Headers/avx512fintrin.h
>
> Modified: cfe/trunk/lib/Headers/avx512fintrin.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/av
> x512fintrin.h?rev=285493&r1=285492&r2=285493&view=diff
> ============================================================
> ==================
> --- cfe/trunk/lib/Headers/avx512fintrin.h (original)
> +++ cfe/trunk/lib/Headers/avx512fintrin.h Sat Oct 29 05:29:20 2016
> @@ -9904,6 +9904,286 @@ _mm512_mask_reduce_mul_ps(__mmask16 __M,
>    _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, ,
> ps);
>  }
>
> +// Used bisection method. At each step, we partition the vector with
> previous
> +// step in half, and the operation is performed on its two halves.
> +// This takes log2(n) steps where n is the number of elements in the
> vector.
> +// This macro uses only intrinsics from the AVX512F feature.
> +
> +// Vec512 - Vector with size of 512.
> +// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for
> example:
> +//              __mm512_max_epi64
> +// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
> +// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
> +
> +#define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2)
> __extension__({ \
> +        Vec512 = _mm512_##IntrinName(
>       \
> +                                (__m512##T1)__builtin_shufflevector(
>        \
> +                                                (__v8d##T2)Vec512,
>      \
> +                                                (__v8d##T2)Vec512,
>      \
> +                                                 0, 1, 2, 3, -1, -1, -1,
> -1),  \
> +                                (__m512##T1)__builtin_shufflevector(
>        \
> +                                                (__v8d##T2)Vec512,
>      \
> +                                                (__v8d##T2)Vec512,
>      \
> +                                                 4, 5, 6, 7, -1, -1, -1,
> -1)); \
> +        Vec512 = _mm512_##IntrinName(
>       \
> +                                (__m512##T1)__builtin_shufflevector(
>        \
> +                                                (__v8d##T2)Vec512,
>      \
> +                                                (__v8d##T2)Vec512,
>      \
> +                                                 0, 1, -1, -1, -1, -1,
> -1, -1),\
> +                                (__m512##T1)__builtin_shufflevector(
>        \
> +                                                (__v8d##T2)Vec512,
>      \
> +                                                (__v8d##T2)Vec512,
>      \
> +                                                 2, 3, -1, -1, -1, -1,
> -1,     \
> +                                                 -1));
>      \
> +        Vec512 = _mm512_##IntrinName(
>       \
> +                                (__m512##T1)__builtin_shufflevector(
>        \
> +                                                (__v8d##T2)Vec512,
>      \
> +                                                (__v8d##T2)Vec512,
>      \
> +                                                0, -1, -1, -1, -1, -1,
> -1, -1),\
> +                                (__m512##T1)__builtin_shufflevector(
>        \
> +                                                (__v8d##T2)Vec512,
>      \
> +                                                (__v8d##T2)Vec512,
>      \
> +                                                1, -1, -1, -1, -1, -1,
> -1, -1))\
> +                                                ;
>       \
> +    return Vec512[0];
>       \
> +  })
> +
> +static __inline__ long long __DEFAULT_FN_ATTRS
> +_mm512_reduce_max_epi64(__m512i __V) {
> +  _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i);
> +}
> +
> +static __inline__ unsigned long long __DEFAULT_FN_ATTRS
> +_mm512_reduce_max_epu64(__m512i __V) {
> +  _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i);
> +}
> +
> +static __inline__ double __DEFAULT_FN_ATTRS
> +_mm512_reduce_max_pd(__m512d __V) {
> +  _mm512_reduce_maxMin_64bit(__V, max_pd, d, f);
> +}
> +
> +static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64
> +(__m512i __V) {
> +  _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i);
> +}
> +
> +static __inline__ unsigned long long __DEFAULT_FN_ATTRS
> +_mm512_reduce_min_epu64(__m512i __V) {
> +  _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i);
> +}
> +
> +static __inline__ double __DEFAULT_FN_ATTRS
> +_mm512_reduce_min_pd(__m512d __V) {
> +  _mm512_reduce_maxMin_64bit(__V, min_pd, d, f);
> +}
> +
> +// Vec512 - Vector with size 512.
> +// Vec512Neutral - A 512 length vector with elements set to the identity
> element
> +// Identity element: {max_epi,0x8000000000000000}
> +//                   {max_epu,0x0000000000000000}
> +//                   {max_pd, 0xFFF0000000000000}
> +//                   {min_epi,0x7FFFFFFFFFFFFFFF}
> +//                   {min_epu,0xFFFFFFFFFFFFFFFF}
> +//                   {min_pd, 0x7FF0000000000000}
> +//
> +// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for
> example:
> +//              __mm512_max_epi64
> +// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
> +// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
> +// T3 - Can get 'q' q word and 'pd' for packed double.
> +//      [__builtin_ia32_select{q|pd}_512]
> +// Mask - Intrinsic Mask
> +
> +#define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral,
> IntrinName, T1, \
> +                                        T2, T3, Mask)
>       \
> +  __extension__({
>       \
> +    Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(
>       \
> +                             (__mmask8)Mask,
>      \
> +                             (__v8d##T2)Vec512,
>       \
> +                             (__v8d##T2)Vec512Neutral);
>       \
> +    _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2);
>       \
> +  })
> +
> +static __inline__ long long __DEFAULT_FN_ATTRS
> +_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
> +  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000
> 000000),
> +                                  max_epi64, i, i, q, __M);
> +}
> +
> +static __inline__ unsigned long long __DEFAULT_FN_ATTRS
> +_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
> +  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000
> 000000),
> +                                  max_epu64, i, i, q, __M);
> +}
> +
> +static __inline__ double __DEFAULT_FN_ATTRS
> +_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
> +  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(0xFFF0000000000
> 000),
> +                                  max_pd, d, f, pd, __M);
> +}
> +
> +static __inline__ long long __DEFAULT_FN_ATTRS
> +_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
> +  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFF
> FFFFFF),
> +                                  min_epi64, i, i, q, __M);
> +}
> +
> +static __inline__ unsigned long long __DEFAULT_FN_ATTRS
> +_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
> +  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFF
> FFFFFF),
> +                                  min_epu64, i, i, q, __M);
> +}
> +
> +static __inline__ double __DEFAULT_FN_ATTRS
> +_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
> +  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(0x7FF0000000000
> 000),
> +                                  min_pd, d, f, pd, __M);
> +}
> +
> +// Vec512 - Vector with size 512.
> +// IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for
> example:
> +//              __mm512_max_epi32
> +// T1 - Can get 'i' for int and ' ' .[__m512{i|}]
> +// T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
> +
> +#define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2)
> __extension__({ \
> +    Vec512 = _mm512_##IntrinName(
>       \
> +                  (__m512##T1)__builtin_shufflevector(
>        \
> +                                  (__v16s##T2)Vec512,
>       \
> +                                  (__v16s##T2)Vec512,
>       \
> +                                  0, 1, 2, 3, 4, 5, 6, 7,
>       \
> +                                  -1, -1, -1, -1, -1, -1, -1, -1),
>      \
> +                  (__m512##T1)__builtin_shufflevector(
>        \
> +                                  (__v16s##T2)Vec512,
>       \
> +                                  (__v16s##T2)Vec512,
>       \
> +                                  8, 9, 10, 11, 12, 13, 14, 15,
>       \
> +                                  -1, -1, -1, -1, -1, -1, -1, -1));
>       \
> +    Vec512 = _mm512_##IntrinName(
>       \
> +                  (__m512##T1)__builtin_shufflevector(
>        \
> +                                  (__v16s##T2)Vec512,
>       \
> +                                  (__v16s##T2)Vec512,
>       \
> +                                  0, 1, 2, 3, -1, -1, -1, -1,
>       \
> +                                  -1, -1, -1, -1, -1, -1, -1, -1),
>      \
> +                  (__m512##T1)__builtin_shufflevector(
>        \
> +                                  (__v16s##T2)Vec512,
>       \
> +                                  (__v16s##T2)Vec512,
>       \
> +                                  4, 5, 6, 7, -1, -1, -1, -1,
>       \
> +                                  -1, -1, -1, -1, -1, -1, -1, -1));
>       \
> +    Vec512 = _mm512_##IntrinName(
>       \
> +                  (__m512##T1)__builtin_shufflevector(
>        \
> +                                  (__v16s##T2)Vec512,
>       \
> +                                  (__v16s##T2)Vec512,
>       \
> +                                  0, 1, -1, -1, -1, -1, -1, -1,
>       \
> +                                  -1, -1, -1, -1, -1, -1, -1, -1),
>      \
> +                  (__m512##T1)__builtin_shufflevector(
>        \
> +                                  (__v16s##T2)Vec512,
>       \
> +                                  (__v16s##T2)Vec512,
>       \
> +                                  2, 3, -1, -1, -1, -1, -1, -1,
>       \
> +                                  -1, -1, -1, -1, -1, -1, -1, -1));
>       \
> +    Vec512 = _mm512_##IntrinName(
>       \
> +                  (__m512##T1)__builtin_shufflevector(
>        \
> +                                  (__v16s##T2)Vec512,
>       \
> +                                  (__v16s##T2)Vec512,
>       \
> +                                  0,  -1, -1, -1, -1, -1, -1, -1,
>       \
> +                                  -1, -1, -1, -1, -1, -1, -1, -1),
>      \
> +                  (__m512##T1)__builtin_shufflevector(
>        \
> +                                  (__v16s##T2)Vec512,
>       \
> +                                  (__v16s##T2)Vec512,
>       \
> +                                  1, -1, -1, -1, -1, -1, -1, -1,
>      \
> +                                  -1, -1, -1, -1, -1, -1, -1, -1));
>       \
> +    return Vec512[0];
>       \
> +  })
> +
> +static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i
> a) {
> +  _mm512_reduce_maxMin_32bit(a, max_epi32, i, i);
> +}
> +
> +static __inline__ unsigned int __DEFAULT_FN_ATTRS
> +_mm512_reduce_max_epu32(__m512i a) {
> +  _mm512_reduce_maxMin_32bit(a, max_epu32, i, i);
> +}
> +
> +static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a)
> {
> +  _mm512_reduce_maxMin_32bit(a, max_ps, , f);
> +}
> +
> +static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i
> a) {
> +  _mm512_reduce_maxMin_32bit(a, min_epi32, i, i);
> +}
> +
> +static __inline__ unsigned int __DEFAULT_FN_ATTRS
> +_mm512_reduce_min_epu32(__m512i a) {
> +  _mm512_reduce_maxMin_32bit(a, min_epu32, i, i);
> +}
> +
> +static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a)
> {
> +  _mm512_reduce_maxMin_32bit(a, min_ps, , f);
> +}
> +
> +// Vec512 - Vector with size 512.
> +// Vec512Neutral - A 512 length vector with elements set to the identity
> element
> +// Identity element: {max_epi,0x80000000}
> +//                   {max_epu,0x00000000}
> +//                   {max_ps, 0xFF800000}
> +//                   {min_epi,0x7FFFFFFF}
> +//                   {min_epu,0xFFFFFFFF}
> +//                   {min_ps, 0x7F800000}
> +//
> +// IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for
> example:
> +//              __mm512_max_epi32
> +// T1 - Can get 'i' for int and ' ' .[__m512{i|}]
> +// T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
> +// T3 - Can get 'q' q word and 'pd' for packed double.
> +//      [__builtin_ia32_select{q|pd}_512]
> +// Mask - Intrinsic Mask
> +
> +#define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral,
> IntrinName, T1, \
> +                                        T2, T3, Mask)
>       \
> +  __extension__({
>       \
> +    Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(
>       \
> +                                        (__mmask16)Mask,
>      \
> +                                        (__v16s##T2)Vec512,
>       \
> +                                        (__v16s##T2)Vec512Neutral);
>       \
> +   _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2);
>        \
> +   })
> +
> +static __inline__ int __DEFAULT_FN_ATTRS
> +_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
> +  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000),
> max_epi32,
> +                                  i, i, d, __M);
> +}
> +
> +static __inline__ unsigned int __DEFAULT_FN_ATTRS
> +_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
> +  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000),
> max_epu32,
> +                                  i, i, d, __M);
> +}
> +
> +static __inline__ float __DEFAULT_FN_ATTRS
> +_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
> +  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(0xFF800000),
> max_ps, , f,
> +                                  ps, __M);
> +}
> +
> +static __inline__ int __DEFAULT_FN_ATTRS
> +_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
> +  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF),
> min_epi32,
> +                                  i, i, d, __M);
> +}
> +
> +static __inline__ unsigned int __DEFAULT_FN_ATTRS
> +_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
> +  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF),
> min_epu32,
> +                                  i, i, d, __M);
> +}
> +
> +static __inline__ float __DEFAULT_FN_ATTRS
> +_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
> +  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(0x7F800000),
> min_ps, , f,
> +                                  ps, __M);
> +}
> +
>  #undef __DEFAULT_FN_ATTRS
>
>  #endif // __AVX512FINTRIN_H
>
> Added: cfe/trunk/test/CodeGen/avx512-reduceMinMaxIntrin.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/a
> vx512-reduceMinMaxIntrin.c?rev=285493&view=auto
> ============================================================
> ==================
> --- cfe/trunk/test/CodeGen/avx512-reduceMinMaxIntrin.c (added)
> +++ cfe/trunk/test/CodeGen/avx512-reduceMinMaxIntrin.c Sat Oct 29
> 05:29:20 2016
> @@ -0,0 +1,437 @@
> +// RUN: %clang_cc1 -ffreestanding %s -O2 -triple=x86_64-apple-darwin
> -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror |opt -instnamer -S
> |FileCheck %s
> +
> +#include <immintrin.h>
> +
> +long long test_mm512_reduce_max_epi64(__m512i __W){
> +  // CHECK: %shuffle1.i = shufflevector <8 x i64> %__W, <8 x i64> undef,
> <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32
> undef>
> +  // CHECK: %tmp = icmp slt <8 x i64> %shuffle1.i, %__W
> +  // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x i64> %__W, <8 x i64>
> %shuffle1.i
> +  // CHECK: %shuffle3.i = shufflevector <8 x i64> %tmp1, <8 x i64> undef,
> <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> +  // CHECK: %tmp2 = icmp sgt <8 x i64> %tmp1, %shuffle3.i
> +  // CHECK: %tmp3 = select <8 x i1> %tmp2, <8 x i64> %tmp1, <8 x i64>
> %shuffle3.i
> +  // CHECK: %shuffle6.i = shufflevector <8 x i64> %tmp3, <8 x i64> undef,
> <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp4 = icmp sgt <8 x i64> %tmp3, %shuffle6.i
> +  // CHECK: %.elt.i = extractelement <8 x i1> %tmp4, i32 0
> +  // CHECK: %.elt20.i = extractelement <8 x i64> %tmp3, i32 0
> +  // CHECK: %shuffle6.elt.i = extractelement <8 x i64> %tmp3, i32 1
> +  // CHECK: %vecext.i = select i1 %.elt.i, i64 %.elt20.i, i64
> %shuffle6.elt.i
> +  // CHECK: ret i64 %vecext.i
> +  return _mm512_reduce_max_epi64(__W);
> +}
> +
> +unsigned long long test_mm512_reduce_max_epu64(__m512i __W){
> +  // CHECK: %shuffle1.i = shufflevector <8 x i64> %__W, <8 x i64> undef,
> <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32
> undef>
> +  // CHECK: %tmp = icmp ult <8 x i64> %shuffle1.i, %__W
> +  // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x i64> %__W, <8 x i64>
> %shuffle1.i
> +  // CHECK: %shuffle3.i = shufflevector <8 x i64> %tmp1, <8 x i64> undef,
> <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> +  // CHECK: %tmp2 = icmp ugt <8 x i64> %tmp1, %shuffle3.i
> +  // CHECK: %tmp3 = select <8 x i1> %tmp2, <8 x i64> %tmp1, <8 x i64>
> %shuffle3.i
> +  // CHECK: %shuffle6.i = shufflevector <8 x i64> %tmp3, <8 x i64> undef,
> <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp4 = icmp ugt <8 x i64> %tmp3, %shuffle6.i
> +  // CHECK: %.elt.i = extractelement <8 x i1> %tmp4, i32 0
> +  // CHECK: %.elt20.i = extractelement <8 x i64> %tmp3, i32 0
> +  // CHECK: %shuffle6.elt.i = extractelement <8 x i64> %tmp3, i32 1
> +  // CHECK: %vecext.i = select i1 %.elt.i, i64 %.elt20.i, i64
> %shuffle6.elt.i
> +  // CHECK: ret i64 %vecext.i
> +  return _mm512_reduce_max_epu64(__W);
> +}
> +
> +double test_mm512_reduce_max_pd(__m512d __W){
> +  // CHECK: %shuffle1.i = shufflevector <8 x double> %__W, <8 x double>
> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32
> undef, i32 undef>
> +  // CHECK: %tmp = tail call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8
> x double> %__W, <8 x double> %shuffle1.i, <8 x double> zeroinitializer, i8
> -1, i32 4) #3
> +  // CHECK: %shuffle3.i = shufflevector <8 x double> %tmp, <8 x double>
> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp1 = tail call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8
> x double> %tmp, <8 x double> %shuffle3.i, <8 x double> zeroinitializer, i8
> -1, i32 4) #3
> +  // CHECK: %shuffle6.i = shufflevector <8 x double> %tmp1, <8 x double>
> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef>
> +  // CHECK: %tmp2 = tail call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8
> x double> %tmp1, <8 x double> %shuffle6.i, <8 x double> zeroinitializer, i8
> -1, i32 4) #3
> +  // CHECK: %vecext.i = extractelement <8 x double> %tmp2, i32 0
> +  // CHECK: ret double %vecext.i
> +  return _mm512_reduce_max_pd(__W);
> +}
> +
> +long long test_mm512_reduce_min_epi64(__m512i __W){
> +  // CHECK: %shuffle1.i = shufflevector <8 x i64> %__W, <8 x i64> undef,
> <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32
> undef>
> +  // CHECK: %tmp = icmp slt <8 x i64> %shuffle1.i, %__W
> +  // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x i64> %__W, <8 x i64>
> %shuffle1.i
> +  // CHECK: %shuffle3.i = shufflevector <8 x i64> %tmp1, <8 x i64> undef,
> <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> +  // CHECK: %tmp2 = icmp sgt <8 x i64> %tmp1, %shuffle3.i
> +  // CHECK: %tmp3 = select <8 x i1> %tmp2, <8 x i64> %tmp1, <8 x i64>
> %shuffle3.i
> +  // CHECK: %shuffle6.i = shufflevector <8 x i64> %tmp3, <8 x i64> undef,
> <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp4 = icmp sgt <8 x i64> %tmp3, %shuffle6.i
> +  // CHECK: %.elt.i = extractelement <8 x i1> %tmp4, i32 0
> +  // CHECK: %.elt20.i = extractelement <8 x i64> %tmp3, i32 0
> +  // CHECK: %shuffle6.elt.i = extractelement <8 x i64> %tmp3, i32 1
> +  // CHECK: %vecext.i = select i1 %.elt.i, i64 %.elt20.i, i64
> %shuffle6.elt.i
> +  // CHECK: ret i64 %vecext.i
> +  return _mm512_reduce_max_epi64(__W);
> +}
> +
> +unsigned long long test_mm512_reduce_min_epu64(__m512i __W){
> +  // CHECK: %shuffle1.i = shufflevector <8 x i64> %__W, <8 x i64> undef,
> <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32
> undef>
> +  // CHECK: %tmp = icmp ult <8 x i64> %shuffle1.i, %__W
> +  // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x i64> %__W, <8 x i64>
> %shuffle1.i
> +  // CHECK: %shuffle3.i = shufflevector <8 x i64> %tmp1, <8 x i64> undef,
> <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> +  // CHECK: %tmp2 = icmp ugt <8 x i64> %tmp1, %shuffle3.i
> +  // CHECK: %tmp3 = select <8 x i1> %tmp2, <8 x i64> %tmp1, <8 x i64>
> %shuffle3.i
> +  // CHECK: %shuffle6.i = shufflevector <8 x i64> %tmp3, <8 x i64> undef,
> <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp4 = icmp ugt <8 x i64> %tmp3, %shuffle6.i
> +  // CHECK: %.elt.i = extractelement <8 x i1> %tmp4, i32 0
> +  // CHECK: %.elt20.i = extractelement <8 x i64> %tmp3, i32 0
> +  // CHECK: %shuffle6.elt.i = extractelement <8 x i64> %tmp3, i32 1
> +  // CHECK: %vecext.i = select i1 %.elt.i, i64 %.elt20.i, i64
> %shuffle6.elt.i
> +  // CHECK: ret i64 %vecext.i
> +  return _mm512_reduce_max_epu64(__W);
> +}
> +
> +double test_mm512_reduce_min_pd(__m512d __W){
> +  // CHECK: %shuffle1.i = shufflevector <8 x double> %__W, <8 x double>
> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32
> undef, i32 undef>
> +  // CHECK: %tmp = tail call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8
> x double> %__W, <8 x double> %shuffle1.i, <8 x double> zeroinitializer, i8
> -1, i32 4) #3
> +  // CHECK: %shuffle3.i = shufflevector <8 x double> %tmp, <8 x double>
> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp1 = tail call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8
> x double> %tmp, <8 x double> %shuffle3.i, <8 x double> zeroinitializer, i8
> -1, i32 4) #3
> +  // CHECK: %shuffle6.i = shufflevector <8 x double> %tmp1, <8 x double>
> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef>
> +  // CHECK: %tmp2 = tail call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8
> x double> %tmp1, <8 x double> %shuffle6.i, <8 x double> zeroinitializer, i8
> -1, i32 4) #3
> +  // CHECK: %vecext.i = extractelement <8 x double> %tmp2, i32 0
> +  // CHECK: ret double %vecext.i
> +  return _mm512_reduce_min_pd(__W);
> +}
> +
> +long long test_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __W){
> +  // CHECK: %tmp = bitcast i8 %__M to <8 x i1>
> +  // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x i64> %__W, <8 x i64> <i64
> -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808,
> i64 -9223372036854775808, i64 -9223372036854775808, i64
> -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>
> +  // CHECK: %shuffle1.i = shufflevector <8 x i64> %tmp1, <8 x i64> undef,
> <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32
> undef>
> +  // CHECK: %tmp2 = icmp sgt <8 x i64> %tmp1, %shuffle1.i
> +  // CHECK: %tmp3 = select <8 x i1> %tmp2, <8 x i64> %tmp1, <8 x i64>
> %shuffle1.i
> +  // CHECK: %shuffle4.i = shufflevector <8 x i64> %tmp3, <8 x i64> undef,
> <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> +  // CHECK: %tmp4 = icmp sgt <8 x i64> %tmp3, %shuffle4.i
> +  // CHECK: %tmp5 = select <8 x i1> %tmp4, <8 x i64> %tmp3, <8 x i64>
> %shuffle4.i
> +  // CHECK: %shuffle7.i = shufflevector <8 x i64> %tmp5, <8 x i64> undef,
> <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp6 = icmp sgt <8 x i64> %tmp5, %shuffle7.i
> +  // CHECK: %.elt.i = extractelement <8 x i1> %tmp6, i32 0
> +  // CHECK: %.elt22.i = extractelement <8 x i64> %tmp5, i32 0
> +  // CHECK: %shuffle7.elt.i = extractelement <8 x i64> %tmp5, i32 1
> +  // CHECK: %vecext.i = select i1 %.elt.i, i64 %.elt22.i, i64
> %shuffle7.elt.i
> +  // CHECK: ret i64 %vecext.i
> +  return _mm512_mask_reduce_max_epi64(__M, __W);
> +}
> +
> +unsigned long test_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i
> __W){
> +  // CHECK: %tmp = bitcast i8 %__M to <8 x i1>
> +  // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x i64> %__W, <8 x i64>
> zeroinitializer
> +  // CHECK: %shuffle1.i = shufflevector <8 x i64> %tmp1, <8 x i64> undef,
> <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32
> undef>
> +  // CHECK: %tmp2 = icmp ugt <8 x i64> %tmp1, %shuffle1.i
> +  // CHECK: %tmp3 = select <8 x i1> %tmp2, <8 x i64> %tmp1, <8 x i64>
> %shuffle1.i
> +  // CHECK: %shuffle4.i = shufflevector <8 x i64> %tmp3, <8 x i64> undef,
> <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> +  // CHECK: %tmp4 = icmp ugt <8 x i64> %tmp3, %shuffle4.i
> +  // CHECK: %tmp5 = select <8 x i1> %tmp4, <8 x i64> %tmp3, <8 x i64>
> %shuffle4.i
> +  // CHECK: %shuffle7.i = shufflevector <8 x i64> %tmp5, <8 x i64> undef,
> <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp6 = icmp ugt <8 x i64> %tmp5, %shuffle7.i
> +  // CHECK: %.elt.i = extractelement <8 x i1> %tmp6, i32 0
> +  // CHECK: %.elt22.i = extractelement <8 x i64> %tmp5, i32 0
> +  // CHECK: %shuffle7.elt.i = extractelement <8 x i64> %tmp5, i32 1
> +  // CHECK: %vecext.i = select i1 %.elt.i, i64 %.elt22.i, i64
> %shuffle7.elt.i
> +  // CHECK: ret i64 %vecext.i
> +  return _mm512_mask_reduce_max_epu64(__M, __W);
> +}
> +
> +long long test_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __W){
> +  // CHECK: %tmp = bitcast i8 %__M to <8 x i1>
> +  // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x double> %__W, <8 x double>
> <double 0x43EFFE0000000000, double 0x43EFFE0000000000, double
> 0x43EFFE0000000000, double 0x43EFFE0000000000, double 0x43EFFE0000000000,
> double 0x43EFFE0000000000, double 0x43EFFE0000000000, double
> 0x43EFFE0000000000>
> +  // CHECK: %shuffle1.i = shufflevector <8 x double> %tmp1, <8 x double>
> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32
> undef, i32 undef>
> +  // CHECK: %tmp2 = tail call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8
> x double> %tmp1, <8 x double> %shuffle1.i, <8 x double> zeroinitializer, i8
> -1, i32 4) #3
> +  // CHECK: %shuffle4.i = shufflevector <8 x double> %tmp2, <8 x double>
> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp3 = tail call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8
> x double> %tmp2, <8 x double> %shuffle4.i, <8 x double> zeroinitializer, i8
> -1, i32 4) #3
> +  // CHECK: %shuffle7.i = shufflevector <8 x double> %tmp3, <8 x double>
> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef>
> +  // CHECK: %tmp4 = tail call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8
> x double> %tmp3, <8 x double> %shuffle7.i, <8 x double> zeroinitializer, i8
> -1, i32 4) #3
> +  // CHECK: %vecext.i = extractelement <8 x double> %tmp4, i32 0
> +  // CHECK: %conv = fptosi double %vecext.i to i64
> +  // CHECK: ret i64 %conv
> +  return _mm512_mask_reduce_max_pd(__M, __W);
> +}
> +
> +long long test_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __W){
> +  // CHECK: %tmp = bitcast i8 %__M to <8 x i1>
> +  // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x i64> %__W, <8 x i64> <i64
> 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807, i64
> 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807, i64
> 9223372036854775807, i64 9223372036854775807>
> +  // CHECK: %shuffle1.i = shufflevector <8 x i64> %tmp1, <8 x i64> undef,
> <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32
> undef>
> +  // CHECK: %tmp2 = icmp slt <8 x i64> %tmp1, %shuffle1.i
> +  // CHECK: %tmp3 = select <8 x i1> %tmp2, <8 x i64> %tmp1, <8 x i64>
> %shuffle1.i
> +  // CHECK: %shuffle4.i = shufflevector <8 x i64> %tmp3, <8 x i64> undef,
> <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> +  // CHECK: %tmp4 = icmp slt <8 x i64> %tmp3, %shuffle4.i
> +  // CHECK: %tmp5 = select <8 x i1> %tmp4, <8 x i64> %tmp3, <8 x i64>
> %shuffle4.i
> +  // CHECK: %shuffle7.i = shufflevector <8 x i64> %tmp5, <8 x i64> undef,
> <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp6 = icmp slt <8 x i64> %tmp5, %shuffle7.i
> +  // CHECK: %.elt.i = extractelement <8 x i1> %tmp6, i32 0
> +  // CHECK: %.elt22.i = extractelement <8 x i64> %tmp5, i32 0
> +  // CHECK: %shuffle7.elt.i = extractelement <8 x i64> %tmp5, i32 1
> +  // CHECK: %vecext.i = select i1 %.elt.i, i64 %.elt22.i, i64
> %shuffle7.elt.i
> +  // CHECK: ret i64 %vecext.i
> +  return _mm512_mask_reduce_min_epi64(__M, __W);
> +}
> +
> +long long test_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __W){
> +  // CHECK: %tmp = bitcast i8 %__M to <8 x i1>
> +  // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x i64> %__W, <8 x i64>
> zeroinitializer
> +  // CHECK: %shuffle1.i = shufflevector <8 x i64> %tmp1, <8 x i64> undef,
> <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32
> undef>
> +  // CHECK: %tmp2 = icmp ugt <8 x i64> %tmp1, %shuffle1.i
> +  // CHECK: %tmp3 = select <8 x i1> %tmp2, <8 x i64> %tmp1, <8 x i64>
> %shuffle1.i
> +  // CHECK: %shuffle4.i = shufflevector <8 x i64> %tmp3, <8 x i64> undef,
> <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> +  // CHECK: %tmp4 = icmp ugt <8 x i64> %tmp3, %shuffle4.i
> +  // CHECK: %tmp5 = select <8 x i1> %tmp4, <8 x i64> %tmp3, <8 x i64>
> %shuffle4.i
> +  // CHECK: %shuffle7.i = shufflevector <8 x i64> %tmp5, <8 x i64> undef,
> <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp6 = icmp ugt <8 x i64> %tmp5, %shuffle7.i
> +  // CHECK: %.elt.i = extractelement <8 x i1> %tmp6, i32 0
> +  // CHECK: %.elt22.i = extractelement <8 x i64> %tmp5, i32 0
> +  // CHECK: %shuffle7.elt.i = extractelement <8 x i64> %tmp5, i32 1
> +  // CHECK: %vecext.i = select i1 %.elt.i, i64 %.elt22.i, i64
> %shuffle7.elt.i
> +  // CHECK: ret i64 %vecext.i
> +  return _mm512_mask_reduce_max_epu64(__M, __W);
> +}
> +
> +double test_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __W){
> +  // CHECK: %tmp = bitcast i8 %__M to <8 x i1>
> +  // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x double> %__W, <8 x double>
> <double 0x43DFFC0000000000, double 0x43DFFC0000000000, double
> 0x43DFFC0000000000, double 0x43DFFC0000000000, double 0x43DFFC0000000000,
> double 0x43DFFC0000000000, double 0x43DFFC0000000000, double
> 0x43DFFC0000000000>
> +  // CHECK: %shuffle1.i = shufflevector <8 x double> %tmp1, <8 x double>
> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32
> undef, i32 undef>
> +  // CHECK: %tmp2 = tail call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8
> x double> %tmp1, <8 x double> %shuffle1.i, <8 x double> zeroinitializer, i8
> -1, i32 4) #3
> +  // CHECK: %shuffle4.i = shufflevector <8 x double> %tmp2, <8 x double>
> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp3 = tail call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8
> x double> %tmp2, <8 x double> %shuffle4.i, <8 x double> zeroinitializer, i8
> -1, i32 4) #3
> +  // CHECK: %shuffle7.i = shufflevector <8 x double> %tmp3, <8 x double>
> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef>
> +  // CHECK: %tmp4 = tail call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8
> x double> %tmp3, <8 x double> %shuffle7.i, <8 x double> zeroinitializer, i8
> -1, i32 4) #3
> +  // CHECK: %vecext.i = extractelement <8 x double> %tmp4, i32 0
> +  // CHECK: ret double %vecext.i
> +  return _mm512_mask_reduce_min_pd(__M, __W);
> +}
> +
> +int test_mm512_reduce_max_epi32(__m512i __W){
> +  // CHECK: %tmp = bitcast <8 x i64> %__W to <16 x i32>
> +  // CHECK: %shuffle1.i = shufflevector <16 x i32> %tmp, <16 x i32>
> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14,
> i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp1 = icmp sgt <16 x i32> %tmp, %shuffle1.i
> +  // CHECK: %tmp2 = select <16 x i1> %tmp1, <16 x i32> %tmp, <16 x i32>
> %shuffle1.i
> +  // CHECK: %shuffle3.i = shufflevector <16 x i32> %tmp2, <16 x i32>
> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp3 = icmp sgt <16 x i32> %tmp2, %shuffle3.i
> +  // CHECK: %tmp4 = select <16 x i1> %tmp3, <16 x i32> %tmp2, <16 x i32>
> %shuffle3.i
> +  // CHECK: %shuffle6.i = shufflevector <16 x i32> %tmp4, <16 x i32>
> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp5 = icmp sgt <16 x i32> %tmp4, %shuffle6.i
> +  // CHECK: %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32>
> %shuffle6.i
> +  // CHECK: %shuffle9.i = shufflevector <16 x i32> %tmp6, <16 x i32>
> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp7 = icmp sgt <16 x i32> %tmp6, %shuffle9.i
> +  // CHECK: %tmp8 = select <16 x i1> %tmp7, <16 x i32> %tmp6, <16 x i32>
> %shuffle9.i
> +  // CHECK: %tmp9 = bitcast <16 x i32> %tmp8 to <8 x i64>
> +  // CHECK: %vecext.i = extractelement <8 x i64> %tmp9, i32 0
> +  // CHECK: %conv.i = trunc i64 %vecext.i to i32
> +  // CHECK: ret i32 %conv.i
> +  return _mm512_reduce_max_epi32(__W);
> +}
> +
> +unsigned int test_mm512_reduce_max_epu32(__m512i __W){
> +  // CHECK: %tmp = bitcast <8 x i64> %__W to <16 x i32>
> +  // CHECK: %shuffle1.i = shufflevector <16 x i32> %tmp, <16 x i32>
> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14,
> i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp1 = icmp ugt <16 x i32> %tmp, %shuffle1.i
> +  // CHECK: %tmp2 = select <16 x i1> %tmp1, <16 x i32> %tmp, <16 x i32>
> %shuffle1.i
> +  // CHECK: %shuffle3.i = shufflevector <16 x i32> %tmp2, <16 x i32>
> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp3 = icmp ugt <16 x i32> %tmp2, %shuffle3.i
> +  // CHECK: %tmp4 = select <16 x i1> %tmp3, <16 x i32> %tmp2, <16 x i32>
> %shuffle3.i
> +  // CHECK: %shuffle6.i = shufflevector <16 x i32> %tmp4, <16 x i32>
> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp5 = icmp ugt <16 x i32> %tmp4, %shuffle6.i
> +  // CHECK: %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32>
> %shuffle6.i
> +  // CHECK: %shuffle9.i = shufflevector <16 x i32> %tmp6, <16 x i32>
> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp7 = icmp ugt <16 x i32> %tmp6, %shuffle9.i
> +  // CHECK: %tmp8 = select <16 x i1> %tmp7, <16 x i32> %tmp6, <16 x i32>
> %shuffle9.i
> +  // CHECK: %tmp9 = bitcast <16 x i32> %tmp8 to <8 x i64>
> +  // CHECK: %vecext.i = extractelement <8 x i64> %tmp9, i32 0
> +  // CHECK: %conv.i = trunc i64 %vecext.i to i32
> +  // CHECK: ret i32 %conv.i
> +  return _mm512_reduce_max_epu32(__W);
> +}
> +
> +float test_mm512_reduce_max_ps(__m512 __W){
> +  // CHECK: %shuffle1.i = shufflevector <16 x float> %__W, <16 x float>
> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14,
> i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16
> x float> %__W, <16 x float> %shuffle1.i, <16 x float> zeroinitializer, i16
> -1, i32 4) #3
> +  // CHECK: %shuffle3.i = shufflevector <16 x float> %tmp, <16 x float>
> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp1 = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16
> x float> %tmp, <16 x float> %shuffle3.i, <16 x float> zeroinitializer, i16
> -1, i32 4) #3
> +  // CHECK: %shuffle6.i = shufflevector <16 x float> %tmp1, <16 x float>
> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp2 = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16
> x float> %tmp1, <16 x float> %shuffle6.i, <16 x float> zeroinitializer, i16
> -1, i32 4) #3
> +  // CHECK: %shuffle9.i = shufflevector <16 x float> %tmp2, <16 x float>
> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp3 = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16
> x float> %tmp2, <16 x float> %shuffle9.i, <16 x float> zeroinitializer, i16
> -1, i32 4) #3
> +  // CHECK: %vecext.i = extractelement <16 x float> %tmp3, i32 0
> +  // CHECK: ret float %vecext.i
> +  return _mm512_reduce_max_ps(__W);
> +}
> +
> +int test_mm512_reduce_min_epi32(__m512i __W){
> +  // CHECK: %tmp = bitcast <8 x i64> %__W to <16 x i32>
> +  // CHECK: %shuffle1.i = shufflevector <16 x i32> %tmp, <16 x i32>
> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14,
> i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp1 = icmp slt <16 x i32> %tmp, %shuffle1.i
> +  // CHECK: %tmp2 = select <16 x i1> %tmp1, <16 x i32> %tmp, <16 x i32>
> %shuffle1.i
> +  // CHECK: %shuffle3.i = shufflevector <16 x i32> %tmp2, <16 x i32>
> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp3 = icmp slt <16 x i32> %tmp2, %shuffle3.i
> +  // CHECK: %tmp4 = select <16 x i1> %tmp3, <16 x i32> %tmp2, <16 x i32>
> %shuffle3.i
> +  // CHECK: %shuffle6.i = shufflevector <16 x i32> %tmp4, <16 x i32>
> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp5 = icmp slt <16 x i32> %tmp4, %shuffle6.i
> +  // CHECK: %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32>
> %shuffle6.i
> +  // CHECK: %shuffle9.i = shufflevector <16 x i32> %tmp6, <16 x i32>
> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp7 = icmp slt <16 x i32> %tmp6, %shuffle9.i
> +  // CHECK: %tmp8 = select <16 x i1> %tmp7, <16 x i32> %tmp6, <16 x i32>
> %shuffle9.i
> +  // CHECK: %tmp9 = bitcast <16 x i32> %tmp8 to <8 x i64>
> +  // CHECK: %vecext.i = extractelement <8 x i64> %tmp9, i32 0
> +  // CHECK: %conv.i = trunc i64 %vecext.i to i32
> +  // CHECK: ret i32 %conv.i
> +  return _mm512_reduce_min_epi32(__W);
> +}
> +
> +unsigned int test_mm512_reduce_min_epu32(__m512i __W){
> +  // CHECK: %tmp = bitcast <8 x i64> %__W to <16 x i32>
> +  // CHECK: %shuffle1.i = shufflevector <16 x i32> %tmp, <16 x i32>
> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14,
> i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp1 = icmp ult <16 x i32> %tmp, %shuffle1.i
> +  // CHECK: %tmp2 = select <16 x i1> %tmp1, <16 x i32> %tmp, <16 x i32>
> %shuffle1.i
> +  // CHECK: %shuffle3.i = shufflevector <16 x i32> %tmp2, <16 x i32>
> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp3 = icmp ult <16 x i32> %tmp2, %shuffle3.i
> +  // CHECK: %tmp4 = select <16 x i1> %tmp3, <16 x i32> %tmp2, <16 x i32>
> %shuffle3.i
> +  // CHECK: %shuffle6.i = shufflevector <16 x i32> %tmp4, <16 x i32>
> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp5 = icmp ult <16 x i32> %tmp4, %shuffle6.i
> +  // CHECK: %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32>
> %shuffle6.i
> +  // CHECK: %shuffle9.i = shufflevector <16 x i32> %tmp6, <16 x i32>
> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp7 = icmp ult <16 x i32> %tmp6, %shuffle9.i
> +  // CHECK: %tmp8 = select <16 x i1> %tmp7, <16 x i32> %tmp6, <16 x i32>
> %shuffle9.i
> +  // CHECK: %tmp9 = bitcast <16 x i32> %tmp8 to <8 x i64>
> +  // CHECK: %vecext.i = extractelement <8 x i64> %tmp9, i32 0
> +  // CHECK: %conv.i = trunc i64 %vecext.i to i32
> +  // CHECK: ret i32 %conv.i
> +  return _mm512_reduce_min_epu32(__W);
> +}
> +
> +float test_mm512_reduce_min_ps(__m512 __W){
> +  // CHECK: %shuffle1.i = shufflevector <16 x float> %__W, <16 x float>
> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14,
> i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16
> x float> %__W, <16 x float> %shuffle1.i, <16 x float> zeroinitializer, i16
> -1, i32 4) #3
> +  // CHECK: %shuffle3.i = shufflevector <16 x float> %tmp, <16 x float>
> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp1 = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16
> x float> %tmp, <16 x float> %shuffle3.i, <16 x float> zeroinitializer, i16
> -1, i32 4) #3
> +  // CHECK: %shuffle6.i = shufflevector <16 x float> %tmp1, <16 x float>
> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp2 = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16
> x float> %tmp1, <16 x float> %shuffle6.i, <16 x float> zeroinitializer, i16
> -1, i32 4) #3
> +  // CHECK: %shuffle9.i = shufflevector <16 x float> %tmp2, <16 x float>
> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp3 = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16
> x float> %tmp2, <16 x float> %shuffle9.i, <16 x float> zeroinitializer, i16
> -1, i32 4) #3
> +  // CHECK: %vecext.i = extractelement <16 x float> %tmp3, i32 0
> +  // CHECK: ret float %vecext.i
> +  return _mm512_reduce_min_ps(__W);
> +}
> +
> +int test_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __W){
> +  // CHECK: %tmp = bitcast <8 x i64> %__W to <16 x i32>
> +  // CHECK: %tmp1 = bitcast i16 %__M to <16 x i1>
> +  // CHECK: %tmp2 = select <16 x i1> %tmp1, <16 x i32> %tmp, <16 x i32>
> <i32 -2147483648 <(214)%20748-3648>, i32 -2147483648 <(214)%20748-3648>,
> i32 -2147483648 <(214)%20748-3648>, i32 -2147483648 <(214)%20748-3648>,
> i32 -2147483648 <(214)%20748-3648>, i32 -2147483648 <(214)%20748-3648>,
> i32 -2147483648 <(214)%20748-3648>, i32 -2147483648 <(214)%20748-3648>,
> i32 -2147483648 <(214)%20748-3648>, i32 -2147483648 <(214)%20748-3648>,
> i32 -2147483648 <(214)%20748-3648>, i32 -2147483648 <(214)%20748-3648>,
> i32 -2147483648 <(214)%20748-3648>, i32 -2147483648 <(214)%20748-3648>,
> i32 -2147483648 <(214)%20748-3648>, i32 -2147483648 <(214)%20748-3648>>
> +  // CHECK: %shuffle1.i = shufflevector <16 x i32> %tmp2, <16 x i32>
> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14,
> i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp3 = icmp sgt <16 x i32> %tmp2, %shuffle1.i
> +  // CHECK: %tmp4 = select <16 x i1> %tmp3, <16 x i32> %tmp2, <16 x i32>
> %shuffle1.i
> +  // CHECK: %shuffle4.i = shufflevector <16 x i32> %tmp4, <16 x i32>
> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp5 = icmp sgt <16 x i32> %tmp4, %shuffle4.i
> +  // CHECK: %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32>
> %shuffle4.i
> +  // CHECK: %shuffle7.i = shufflevector <16 x i32> %tmp6, <16 x i32>
> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp7 = icmp sgt <16 x i32> %tmp6, %shuffle7.i
> +  // CHECK: %tmp8 = select <16 x i1> %tmp7, <16 x i32> %tmp6, <16 x i32>
> %shuffle7.i
> +  // CHECK: %shuffle10.i = shufflevector <16 x i32> %tmp8, <16 x i32>
> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp9 = icmp sgt <16 x i32> %tmp8, %shuffle10.i
> +  // CHECK: %tmp10 = select <16 x i1> %tmp9, <16 x i32> %tmp8, <16 x i32>
> %shuffle10.i
> +  // CHECK: %tmp11 = bitcast <16 x i32> %tmp10 to <8 x i64>
> +  // CHECK: %vecext.i = extractelement <8 x i64> %tmp11, i32 0
> +  // CHECK: %conv.i = trunc i64 %vecext.i to i32
> +  // CHECK: ret i32 %conv.i
> +  return _mm512_mask_reduce_max_epi32(__M, __W);
> +}
> +
> +unsigned int test_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i
> __W){
> +  // CHECK: %tmp = bitcast <8 x i64> %__W to <16 x i32>
> +  // CHECK: %tmp1 = bitcast i16 %__M to <16 x i1>
> +  // CHECK: %tmp2 = select <16 x i1> %tmp1, <16 x i32> %tmp, <16 x i32>
> zeroinitializer
> +  // CHECK: %shuffle1.i = shufflevector <16 x i32> %tmp2, <16 x i32>
> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14,
> i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp3 = icmp ugt <16 x i32> %tmp2, %shuffle1.i
> +  // CHECK: %tmp4 = select <16 x i1> %tmp3, <16 x i32> %tmp2, <16 x i32>
> %shuffle1.i
> +  // CHECK: %shuffle4.i = shufflevector <16 x i32> %tmp4, <16 x i32>
> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp5 = icmp ugt <16 x i32> %tmp4, %shuffle4.i
> +  // CHECK: %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32>
> %shuffle4.i
> +  // CHECK: %shuffle7.i = shufflevector <16 x i32> %tmp6, <16 x i32>
> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp7 = icmp ugt <16 x i32> %tmp6, %shuffle7.i
> +  // CHECK: %tmp8 = select <16 x i1> %tmp7, <16 x i32> %tmp6, <16 x i32>
> %shuffle7.i
> +  // CHECK: %shuffle10.i = shufflevector <16 x i32> %tmp8, <16 x i32>
> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp9 = icmp ugt <16 x i32> %tmp8, %shuffle10.i
> +  // CHECK: %tmp10 = select <16 x i1> %tmp9, <16 x i32> %tmp8, <16 x i32>
> %shuffle10.i
> +  // CHECK: %tmp11 = bitcast <16 x i32> %tmp10 to <8 x i64>
> +  // CHECK: %vecext.i = extractelement <8 x i64> %tmp11, i32 0
> +  // CHECK: %conv.i = trunc i64 %vecext.i to i32
> +  // CHECK: ret i32 %conv.i
> +  return _mm512_mask_reduce_max_epu32(__M, __W);
> +}
> +
> +float test_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __W){
> +  // CHECK: %tmp = bitcast i16 %__M to <16 x i1>
> +  // CHECK: %tmp1 = select <16 x i1> %tmp, <16 x float> %__W, <16 x
> float> <float 0x41EFF00000000000, float 0x41EFF00000000000, float
> 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000,
> float 0x41EFF00000000000, float 0x41EFF00000000000, float
> 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000,
> float 0x41EFF00000000000, float 0x41EFF00000000000, float
> 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000,
> float 0x41EFF00000000000>
> +  // CHECK: %shuffle1.i = shufflevector <16 x float> %tmp1, <16 x float>
> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14,
> i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp2 = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16
> x float> %tmp1, <16 x float> %shuffle1.i, <16 x float> zeroinitializer, i16
> -1, i32 4) #3
> +  // CHECK: %shuffle4.i = shufflevector <16 x float> %tmp2, <16 x float>
> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp3 = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16
> x float> %tmp2, <16 x float> %shuffle4.i, <16 x float> zeroinitializer, i16
> -1, i32 4) #3
> +  // CHECK: %shuffle7.i = shufflevector <16 x float> %tmp3, <16 x float>
> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp4 = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16
> x float> %tmp3, <16 x float> %shuffle7.i, <16 x float> zeroinitializer, i16
> -1, i32 4) #3
> +  // CHECK: %shuffle10.i = shufflevector <16 x float> %tmp4, <16 x float>
> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp5 = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16
> x float> %tmp4, <16 x float> %shuffle10.i, <16 x float> zeroinitializer,
> i16 -1, i32 4) #3
> +  // CHECK: %vecext.i = extractelement <16 x float> %tmp5, i32 0
> +  // CHECK: ret float %vecext.i
> +  return _mm512_mask_reduce_max_ps(__M, __W);
> +}
> +
> +int test_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __W){
> +  // CHECK: %tmp = bitcast <8 x i64> %__W to <16 x i32>
> +  // CHECK: %tmp1 = bitcast i16 %__M to <16 x i1>
> +  // CHECK: %tmp2 = select <16 x i1> %tmp1, <16 x i32> %tmp, <16 x i32>
> <i32 2147483647 <(214)%20748-3647>, i32 2147483647 <(214)%20748-3647>,
> i32 2147483647 <(214)%20748-3647>, i32 2147483647 <(214)%20748-3647>, i32
> 2147483647 <(214)%20748-3647>, i32 2147483647 <(214)%20748-3647>, i32
> 2147483647 <(214)%20748-3647>, i32 2147483647 <(214)%20748-3647>, i32
> 2147483647 <(214)%20748-3647>, i32 2147483647 <(214)%20748-3647>, i32
> 2147483647 <(214)%20748-3647>, i32 2147483647 <(214)%20748-3647>, i32
> 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
> +  // CHECK: %shuffle1.i = shufflevector <16 x i32> %tmp2, <16 x i32>
> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14,
> i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp3 = icmp slt <16 x i32> %tmp2, %shuffle1.i
> +  // CHECK: %tmp4 = select <16 x i1> %tmp3, <16 x i32> %tmp2, <16 x i32>
> %shuffle1.i
> +  // CHECK: %shuffle4.i = shufflevector <16 x i32> %tmp4, <16 x i32>
> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp5 = icmp slt <16 x i32> %tmp4, %shuffle4.i
> +  // CHECK: %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32>
> %shuffle4.i
> +  // CHECK: %shuffle7.i = shufflevector <16 x i32> %tmp6, <16 x i32>
> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp7 = icmp slt <16 x i32> %tmp6, %shuffle7.i
> +  // CHECK: %tmp8 = select <16 x i1> %tmp7, <16 x i32> %tmp6, <16 x i32>
> %shuffle7.i
> +  // CHECK: %shuffle10.i = shufflevector <16 x i32> %tmp8, <16 x i32>
> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp9 = icmp slt <16 x i32> %tmp8, %shuffle10.i
> +  // CHECK: %tmp10 = select <16 x i1> %tmp9, <16 x i32> %tmp8, <16 x i32>
> %shuffle10.i
> +  // CHECK: %tmp11 = bitcast <16 x i32> %tmp10 to <8 x i64>
> +  // CHECK: %vecext.i = extractelement <8 x i64> %tmp11, i32 0
> +  // CHECK: %conv.i = trunc i64 %vecext.i to i32
> +  // CHECK: ret i32 %conv.i
> +  return _mm512_mask_reduce_min_epi32(__M, __W);
> +}
> +
> +unsigned int test_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i
> __W){
> +  // CHECK: %tmp = bitcast <8 x i64> %__W to <16 x i32>
> +  // CHECK: %tmp1 = bitcast i16 %__M to <16 x i1>
> +  // CHECK: %tmp2 = select <16 x i1> %tmp1, <16 x i32> %tmp, <16 x i32>
> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1,
> i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
> +  // CHECK: %shuffle1.i = shufflevector <16 x i32> %tmp2, <16 x i32>
> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14,
> i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp3 = icmp ult <16 x i32> %tmp2, %shuffle1.i
> +  // CHECK: %tmp4 = select <16 x i1> %tmp3, <16 x i32> %tmp2, <16 x i32>
> %shuffle1.i
> +  // CHECK: %shuffle4.i = shufflevector <16 x i32> %tmp4, <16 x i32>
> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp5 = icmp ult <16 x i32> %tmp4, %shuffle4.i
> +  // CHECK: %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32>
> %shuffle4.i
> +  // CHECK: %shuffle7.i = shufflevector <16 x i32> %tmp6, <16 x i32>
> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp7 = icmp ult <16 x i32> %tmp6, %shuffle7.i
> +  // CHECK: %tmp8 = select <16 x i1> %tmp7, <16 x i32> %tmp6, <16 x i32>
> %shuffle7.i
> +  // CHECK: %shuffle10.i = shufflevector <16 x i32> %tmp8, <16 x i32>
> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp9 = icmp ult <16 x i32> %tmp8, %shuffle10.i
> +  // CHECK: %tmp10 = select <16 x i1> %tmp9, <16 x i32> %tmp8, <16 x i32>
> %shuffle10.i
> +  // CHECK: %tmp11 = bitcast <16 x i32> %tmp10 to <8 x i64>
> +  // CHECK: %vecext.i = extractelement <8 x i64> %tmp11, i32 0
> +  // CHECK: %conv.i = trunc i64 %vecext.i to i32
> +  // CHECK: ret i32 %conv.i
> +  return _mm512_mask_reduce_min_epu32(__M, __W);
> +}
> +
> +float test_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __W){
> +  // CHECK: %tmp = bitcast i16 %__M to <16 x i1>
> +  // CHECK: %tmp1 = select <16 x i1> %tmp, <16 x float> %__W, <16 x
> float> <float 0x41DFE00000000000, float 0x41DFE00000000000, float
> 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000,
> float 0x41DFE00000000000, float 0x41DFE00000000000, float
> 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000,
> float 0x41DFE00000000000, float 0x41DFE00000000000, float
> 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000,
> float 0x41DFE00000000000>
> +  // CHECK: %shuffle1.i = shufflevector <16 x float> %tmp1, <16 x float>
> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14,
> i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef>
> +  // CHECK: %tmp2 = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16
> x float> %tmp1, <16 x float> %shuffle1.i, <16 x float> zeroinitializer, i16
> -1, i32 4) #3
> +  // CHECK: %shuffle4.i = shufflevector <16 x float> %tmp2, <16 x float>
> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp3 = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16
> x float> %tmp2, <16 x float> %shuffle4.i, <16 x float> zeroinitializer, i16
> -1, i32 4) #3
> +  // CHECK: %shuffle7.i = shufflevector <16 x float> %tmp3, <16 x float>
> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp4 = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16
> x float> %tmp3, <16 x float> %shuffle7.i, <16 x float> zeroinitializer, i16
> -1, i32 4) #3
> +  // CHECK: %shuffle10.i = shufflevector <16 x float> %tmp4, <16 x float>
> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef, i32 undef, i32 undef, i32 undef>
> +  // CHECK: %tmp5 = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16
> x float> %tmp4, <16 x float> %shuffle10.i, <16 x float> zeroinitializer,
> i16 -1, i32 4) #3
> +  // CHECK: %vecext.i = extractelement <16 x float> %tmp5, i32 0
> +  // CHECK: ret float %vecext.i
> +  return _mm512_mask_reduce_min_ps(__M, __W);
> +}
> +
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20170203/1ba4c6b8/attachment-0001.html>


More information about the cfe-commits mailing list