[llvm] r346538 - [CostModel][X86] SK_ExtractSubvector is free if the subvector is at the start of the source vector

George Burgess IV via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 9 19:30:16 PST 2018


Happy Friday (night)!

It looks like this change either causes or unmasks a bug (
http://llvm.org/pr39615) in LLVM. I don't know our vector or machine bits
well, so can you please help look into this? :)

Thank you,
George

On Fri, Nov 9, 2018 at 11:06 AM Simon Pilgrim via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: rksimon
> Date: Fri Nov  9 11:04:27 2018
> New Revision: 346538
>
> URL: http://llvm.org/viewvc/llvm-project?rev=346538&view=rev
> Log:
> [CostModel][X86] SK_ExtractSubvector is free if the subvector is at the
> start of the source vector
>
> Modified:
>     llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
>     llvm/trunk/test/Analysis/CostModel/X86/reduce-add.ll
>     llvm/trunk/test/Analysis/CostModel/X86/reduce-and.ll
>     llvm/trunk/test/Analysis/CostModel/X86/reduce-mul.ll
>     llvm/trunk/test/Analysis/CostModel/X86/reduce-or.ll
>     llvm/trunk/test/Analysis/CostModel/X86/reduce-smax.ll
>     llvm/trunk/test/Analysis/CostModel/X86/reduce-smin.ll
>     llvm/trunk/test/Analysis/CostModel/X86/reduce-umax.ll
>     llvm/trunk/test/Analysis/CostModel/X86/reduce-umin.ll
>     llvm/trunk/test/Analysis/CostModel/X86/reduce-xor.ll
>     llvm/trunk/test/Analysis/CostModel/X86/reduction.ll
>     llvm/trunk/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll
>     llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Fri Nov  9
> 11:04:27 2018
> @@ -872,6 +872,12 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
>    if (Kind == TTI::SK_Broadcast)
>      LT.first = 1;
>
> +  // Subvector extractions are free if they start at beginning of the
> +  // vector.
> +  if (Kind == TTI::SK_ExtractSubvector &&
> +      ((Index % LT.second.getVectorNumElements()) == 0))
> +    return 0;
> +
>    // We are going to permute multiple sources and the result will be in
> multiple
>    // destinations. Providing an accurate cost only for splits where the
> element
>    // type remains the same.
> @@ -909,15 +915,15 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
>    }
>
>    static const CostTblEntry AVX512VBMIShuffleTbl[] = {
> -    { TTI::SK_Reverse,          MVT::v64i8,  1 }, // vpermb
> -    { TTI::SK_Reverse,          MVT::v32i8,  1 }, // vpermb
> +      {TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb
> +      {TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb
>
> -    { TTI::SK_PermuteSingleSrc, MVT::v64i8,  1 }, // vpermb
> -    { TTI::SK_PermuteSingleSrc, MVT::v32i8,  1 }, // vpermb
> +      {TTI::SK_PermuteSingleSrc, MVT::v64i8, 1}, // vpermb
> +      {TTI::SK_PermuteSingleSrc, MVT::v32i8, 1}, // vpermb
>
> -    { TTI::SK_PermuteTwoSrc,    MVT::v64i8,  1 }, // vpermt2b
> -    { TTI::SK_PermuteTwoSrc,    MVT::v32i8,  1 }, // vpermt2b
> -    { TTI::SK_PermuteTwoSrc,    MVT::v16i8,  1 }  // vpermt2b
> +      {TTI::SK_PermuteTwoSrc, MVT::v64i8, 1}, // vpermt2b
> +      {TTI::SK_PermuteTwoSrc, MVT::v32i8, 1}, // vpermt2b
> +      {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1}  // vpermt2b
>    };
>
>    if (ST->hasVBMI())
> @@ -926,25 +932,25 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
>        return LT.first * Entry->Cost;
>
>    static const CostTblEntry AVX512BWShuffleTbl[] = {
> -    { TTI::SK_Broadcast,        MVT::v32i16, 1 }, // vpbroadcastw
> -    { TTI::SK_Broadcast,        MVT::v64i8,  1 }, // vpbroadcastb
> +      {TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw
> +      {TTI::SK_Broadcast, MVT::v64i8, 1},  // vpbroadcastb
>
> -    { TTI::SK_Reverse,          MVT::v32i16, 1 }, // vpermw
> -    { TTI::SK_Reverse,          MVT::v16i16, 1 }, // vpermw
> -    { TTI::SK_Reverse,          MVT::v64i8,  2 }, // pshufb + vshufi64x2
> -
> -    { TTI::SK_PermuteSingleSrc, MVT::v32i16, 1 }, // vpermw
> -    { TTI::SK_PermuteSingleSrc, MVT::v16i16, 1 }, // vpermw
> -    { TTI::SK_PermuteSingleSrc, MVT::v8i16,  1 }, // vpermw
> -    { TTI::SK_PermuteSingleSrc, MVT::v64i8,  8 }, // extend to v32i16
> -    { TTI::SK_PermuteSingleSrc, MVT::v32i8,  3 }, // vpermw + zext/trunc
> -
> -    { TTI::SK_PermuteTwoSrc,    MVT::v32i16, 1 }, // vpermt2w
> -    { TTI::SK_PermuteTwoSrc,    MVT::v16i16, 1 }, // vpermt2w
> -    { TTI::SK_PermuteTwoSrc,    MVT::v8i16,  1 }, // vpermt2w
> -    { TTI::SK_PermuteTwoSrc,    MVT::v32i8,  3 }, // zext + vpermt2w +
> trunc
> -    { TTI::SK_PermuteTwoSrc,    MVT::v64i8, 19 }, // 6 * v32i8 + 1
> -    { TTI::SK_PermuteTwoSrc,    MVT::v16i8,  3 }  // zext + vpermt2w +
> trunc
> +      {TTI::SK_Reverse, MVT::v32i16, 1}, // vpermw
> +      {TTI::SK_Reverse, MVT::v16i16, 1}, // vpermw
> +      {TTI::SK_Reverse, MVT::v64i8, 2},  // pshufb + vshufi64x2
> +
> +      {TTI::SK_PermuteSingleSrc, MVT::v32i16, 1}, // vpermw
> +      {TTI::SK_PermuteSingleSrc, MVT::v16i16, 1}, // vpermw
> +      {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1},  // vpermw
> +      {TTI::SK_PermuteSingleSrc, MVT::v64i8, 8},  // extend to v32i16
> +      {TTI::SK_PermuteSingleSrc, MVT::v32i8, 3},  // vpermw + zext/trunc
> +
> +      {TTI::SK_PermuteTwoSrc, MVT::v32i16, 1}, // vpermt2w
> +      {TTI::SK_PermuteTwoSrc, MVT::v16i16, 1}, // vpermt2w
> +      {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1},  // vpermt2w
> +      {TTI::SK_PermuteTwoSrc, MVT::v32i8, 3},  // zext + vpermt2w + trunc
> +      {TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, // 6 * v32i8 + 1
> +      {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3}   // zext + vpermt2w + trunc
>    };
>
>    if (ST->hasBWI())
> @@ -953,42 +959,42 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
>        return LT.first * Entry->Cost;
>
>    static const CostTblEntry AVX512ShuffleTbl[] = {
> -    { TTI::SK_Broadcast,        MVT::v8f64,  1 }, // vbroadcastpd
> -    { TTI::SK_Broadcast,        MVT::v16f32, 1 }, // vbroadcastps
> -    { TTI::SK_Broadcast,        MVT::v8i64,  1 }, // vpbroadcastq
> -    { TTI::SK_Broadcast,        MVT::v16i32, 1 }, // vpbroadcastd
> -
> -    { TTI::SK_Reverse,          MVT::v8f64,  1 }, // vpermpd
> -    { TTI::SK_Reverse,          MVT::v16f32, 1 }, // vpermps
> -    { TTI::SK_Reverse,          MVT::v8i64,  1 }, // vpermq
> -    { TTI::SK_Reverse,          MVT::v16i32, 1 }, // vpermd
> -
> -    { TTI::SK_PermuteSingleSrc, MVT::v8f64,  1 }, // vpermpd
> -    { TTI::SK_PermuteSingleSrc, MVT::v4f64,  1 }, // vpermpd
> -    { TTI::SK_PermuteSingleSrc, MVT::v2f64,  1 }, // vpermpd
> -    { TTI::SK_PermuteSingleSrc, MVT::v16f32, 1 }, // vpermps
> -    { TTI::SK_PermuteSingleSrc, MVT::v8f32,  1 }, // vpermps
> -    { TTI::SK_PermuteSingleSrc, MVT::v4f32,  1 }, // vpermps
> -    { TTI::SK_PermuteSingleSrc, MVT::v8i64,  1 }, // vpermq
> -    { TTI::SK_PermuteSingleSrc, MVT::v4i64,  1 }, // vpermq
> -    { TTI::SK_PermuteSingleSrc, MVT::v2i64,  1 }, // vpermq
> -    { TTI::SK_PermuteSingleSrc, MVT::v16i32, 1 }, // vpermd
> -    { TTI::SK_PermuteSingleSrc, MVT::v8i32,  1 }, // vpermd
> -    { TTI::SK_PermuteSingleSrc, MVT::v4i32,  1 }, // vpermd
> -    { TTI::SK_PermuteSingleSrc, MVT::v16i8,  1 }, // pshufb
> -
> -    { TTI::SK_PermuteTwoSrc,    MVT::v8f64,  1 }, // vpermt2pd
> -    { TTI::SK_PermuteTwoSrc,    MVT::v16f32, 1 }, // vpermt2ps
> -    { TTI::SK_PermuteTwoSrc,    MVT::v8i64,  1 }, // vpermt2q
> -    { TTI::SK_PermuteTwoSrc,    MVT::v16i32, 1 }, // vpermt2d
> -    { TTI::SK_PermuteTwoSrc,    MVT::v4f64,  1 }, // vpermt2pd
> -    { TTI::SK_PermuteTwoSrc,    MVT::v8f32,  1 }, // vpermt2ps
> -    { TTI::SK_PermuteTwoSrc,    MVT::v4i64,  1 }, // vpermt2q
> -    { TTI::SK_PermuteTwoSrc,    MVT::v8i32,  1 }, // vpermt2d
> -    { TTI::SK_PermuteTwoSrc,    MVT::v2f64,  1 }, // vpermt2pd
> -    { TTI::SK_PermuteTwoSrc,    MVT::v4f32,  1 }, // vpermt2ps
> -    { TTI::SK_PermuteTwoSrc,    MVT::v2i64,  1 }, // vpermt2q
> -    { TTI::SK_PermuteTwoSrc,    MVT::v4i32,  1 }  // vpermt2d
> +      {TTI::SK_Broadcast, MVT::v8f64, 1},  // vbroadcastpd
> +      {TTI::SK_Broadcast, MVT::v16f32, 1}, // vbroadcastps
> +      {TTI::SK_Broadcast, MVT::v8i64, 1},  // vpbroadcastq
> +      {TTI::SK_Broadcast, MVT::v16i32, 1}, // vpbroadcastd
> +
> +      {TTI::SK_Reverse, MVT::v8f64, 1},  // vpermpd
> +      {TTI::SK_Reverse, MVT::v16f32, 1}, // vpermps
> +      {TTI::SK_Reverse, MVT::v8i64, 1},  // vpermq
> +      {TTI::SK_Reverse, MVT::v16i32, 1}, // vpermd
> +
> +      {TTI::SK_PermuteSingleSrc, MVT::v8f64, 1},  // vpermpd
> +      {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1},  // vpermpd
> +      {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1},  // vpermpd
> +      {TTI::SK_PermuteSingleSrc, MVT::v16f32, 1}, // vpermps
> +      {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1},  // vpermps
> +      {TTI::SK_PermuteSingleSrc, MVT::v4f32, 1},  // vpermps
> +      {TTI::SK_PermuteSingleSrc, MVT::v8i64, 1},  // vpermq
> +      {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1},  // vpermq
> +      {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1},  // vpermq
> +      {TTI::SK_PermuteSingleSrc, MVT::v16i32, 1}, // vpermd
> +      {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1},  // vpermd
> +      {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1},  // vpermd
> +      {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1},  // pshufb
> +
> +      {TTI::SK_PermuteTwoSrc, MVT::v8f64, 1},  // vpermt2pd
> +      {TTI::SK_PermuteTwoSrc, MVT::v16f32, 1}, // vpermt2ps
> +      {TTI::SK_PermuteTwoSrc, MVT::v8i64, 1},  // vpermt2q
> +      {TTI::SK_PermuteTwoSrc, MVT::v16i32, 1}, // vpermt2d
> +      {TTI::SK_PermuteTwoSrc, MVT::v4f64, 1},  // vpermt2pd
> +      {TTI::SK_PermuteTwoSrc, MVT::v8f32, 1},  // vpermt2ps
> +      {TTI::SK_PermuteTwoSrc, MVT::v4i64, 1},  // vpermt2q
> +      {TTI::SK_PermuteTwoSrc, MVT::v8i32, 1},  // vpermt2d
> +      {TTI::SK_PermuteTwoSrc, MVT::v2f64, 1},  // vpermt2pd
> +      {TTI::SK_PermuteTwoSrc, MVT::v4f32, 1},  // vpermt2ps
> +      {TTI::SK_PermuteTwoSrc, MVT::v2i64, 1},  // vpermt2q
> +      {TTI::SK_PermuteTwoSrc, MVT::v4i32, 1}   // vpermt2d
>    };
>
>    if (ST->hasAVX512())
> @@ -996,40 +1002,40 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
>        return LT.first * Entry->Cost;
>
>    static const CostTblEntry AVX2ShuffleTbl[] = {
> -    { TTI::SK_Broadcast, MVT::v4f64,  1 }, // vbroadcastpd
> -    { TTI::SK_Broadcast, MVT::v8f32,  1 }, // vbroadcastps
> -    { TTI::SK_Broadcast, MVT::v4i64,  1 }, // vpbroadcastq
> -    { TTI::SK_Broadcast, MVT::v8i32,  1 }, // vpbroadcastd
> -    { TTI::SK_Broadcast, MVT::v16i16, 1 }, // vpbroadcastw
> -    { TTI::SK_Broadcast, MVT::v32i8,  1 }, // vpbroadcastb
> -
> -    { TTI::SK_Reverse,   MVT::v4f64,  1 }, // vpermpd
> -    { TTI::SK_Reverse,   MVT::v8f32,  1 }, // vpermps
> -    { TTI::SK_Reverse,   MVT::v4i64,  1 }, // vpermq
> -    { TTI::SK_Reverse,   MVT::v8i32,  1 }, // vpermd
> -    { TTI::SK_Reverse,   MVT::v16i16, 2 }, // vperm2i128 + pshufb
> -    { TTI::SK_Reverse,   MVT::v32i8,  2 }, // vperm2i128 + pshufb
> -
> -    { TTI::SK_Select,    MVT::v16i16, 1 }, // vpblendvb
> -    { TTI::SK_Select,    MVT::v32i8,  1 }, // vpblendvb
> -
> -    { TTI::SK_PermuteSingleSrc, MVT::v4f64,  1 }, // vpermpd
> -    { TTI::SK_PermuteSingleSrc, MVT::v8f32,  1 }, // vpermps
> -    { TTI::SK_PermuteSingleSrc, MVT::v4i64,  1 }, // vpermq
> -    { TTI::SK_PermuteSingleSrc, MVT::v8i32,  1 }, // vpermd
> -    { TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vperm2i128 +
> 2*vpshufb
> +      {TTI::SK_Broadcast, MVT::v4f64, 1},  // vbroadcastpd
> +      {TTI::SK_Broadcast, MVT::v8f32, 1},  // vbroadcastps
> +      {TTI::SK_Broadcast, MVT::v4i64, 1},  // vpbroadcastq
> +      {TTI::SK_Broadcast, MVT::v8i32, 1},  // vpbroadcastd
> +      {TTI::SK_Broadcast, MVT::v16i16, 1}, // vpbroadcastw
> +      {TTI::SK_Broadcast, MVT::v32i8, 1},  // vpbroadcastb
> +
> +      {TTI::SK_Reverse, MVT::v4f64, 1},  // vpermpd
> +      {TTI::SK_Reverse, MVT::v8f32, 1},  // vpermps
> +      {TTI::SK_Reverse, MVT::v4i64, 1},  // vpermq
> +      {TTI::SK_Reverse, MVT::v8i32, 1},  // vpermd
> +      {TTI::SK_Reverse, MVT::v16i16, 2}, // vperm2i128 + pshufb
> +      {TTI::SK_Reverse, MVT::v32i8, 2},  // vperm2i128 + pshufb
> +
> +      {TTI::SK_Select, MVT::v16i16, 1}, // vpblendvb
> +      {TTI::SK_Select, MVT::v32i8, 1},  // vpblendvb
> +
> +      {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1},  // vpermpd
> +      {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1},  // vpermps
> +      {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1},  // vpermq
> +      {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1},  // vpermd
> +      {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vperm2i128 +
> 2*vpshufb
>                                                    // + vpblendvb
> -    { TTI::SK_PermuteSingleSrc, MVT::v32i8,  4 }, // vperm2i128 +
> 2*vpshufb
> +      {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4},  // vperm2i128 +
> 2*vpshufb
>                                                    // + vpblendvb
>
> -    { TTI::SK_PermuteTwoSrc,    MVT::v4f64,  3 }, // 2*vpermpd + vblendpd
> -    { TTI::SK_PermuteTwoSrc,    MVT::v8f32,  3 }, // 2*vpermps + vblendps
> -    { TTI::SK_PermuteTwoSrc,    MVT::v4i64,  3 }, // 2*vpermq + vpblendd
> -    { TTI::SK_PermuteTwoSrc,    MVT::v8i32,  3 }, // 2*vpermd + vpblendd
> -    { TTI::SK_PermuteTwoSrc,    MVT::v16i16, 7 }, // 2*vperm2i128 +
> 4*vpshufb
> -                                                  // + vpblendvb
> -    { TTI::SK_PermuteTwoSrc,    MVT::v32i8,  7 }, // 2*vperm2i128 +
> 4*vpshufb
> -                                                  // + vpblendvb
> +      {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3},  // 2*vpermpd + vblendpd
> +      {TTI::SK_PermuteTwoSrc, MVT::v8f32, 3},  // 2*vpermps + vblendps
> +      {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3},  // 2*vpermq + vpblendd
> +      {TTI::SK_PermuteTwoSrc, MVT::v8i32, 3},  // 2*vpermd + vpblendd
> +      {TTI::SK_PermuteTwoSrc, MVT::v16i16, 7}, // 2*vperm2i128 + 4*vpshufb
> +                                               // + vpblendvb
> +      {TTI::SK_PermuteTwoSrc, MVT::v32i8, 7},  // 2*vperm2i128 + 4*vpshufb
> +                                               // + vpblendvb
>    };
>
>    if (ST->hasAVX2())
> @@ -1037,21 +1043,21 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
>        return LT.first * Entry->Cost;
>
>    static const CostTblEntry XOPShuffleTbl[] = {
> -    { TTI::SK_PermuteSingleSrc, MVT::v4f64,   2 }, // vperm2f128 +
> vpermil2pd
> -    { TTI::SK_PermuteSingleSrc, MVT::v8f32,   2 }, // vperm2f128 +
> vpermil2ps
> -    { TTI::SK_PermuteSingleSrc, MVT::v4i64,   2 }, // vperm2f128 +
> vpermil2pd
> -    { TTI::SK_PermuteSingleSrc, MVT::v8i32,   2 }, // vperm2f128 +
> vpermil2ps
> -    { TTI::SK_PermuteSingleSrc, MVT::v16i16,  4 }, // vextractf128 +
> 2*vpperm
> -                                                   // + vinsertf128
> -    { TTI::SK_PermuteSingleSrc, MVT::v32i8,   4 }, // vextractf128 +
> 2*vpperm
> -                                                   // + vinsertf128
> -
> -    { TTI::SK_PermuteTwoSrc,    MVT::v16i16,  9 }, // 2*vextractf128 +
> 6*vpperm
> -                                                   // + vinsertf128
> -    { TTI::SK_PermuteTwoSrc,    MVT::v8i16,   1 }, // vpperm
> -    { TTI::SK_PermuteTwoSrc,    MVT::v32i8,   9 }, // 2*vextractf128 +
> 6*vpperm
> -                                                   // + vinsertf128
> -    { TTI::SK_PermuteTwoSrc,    MVT::v16i8,   1 }, // vpperm
> +      {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2},  // vperm2f128 +
> vpermil2pd
> +      {TTI::SK_PermuteSingleSrc, MVT::v8f32, 2},  // vperm2f128 +
> vpermil2ps
> +      {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2},  // vperm2f128 +
> vpermil2pd
> +      {TTI::SK_PermuteSingleSrc, MVT::v8i32, 2},  // vperm2f128 +
> vpermil2ps
> +      {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vextractf128 +
> 2*vpperm
> +                                                  // + vinsertf128
> +      {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4},  // vextractf128 +
> 2*vpperm
> +                                                  // + vinsertf128
> +
> +      {TTI::SK_PermuteTwoSrc, MVT::v16i16, 9}, // 2*vextractf128 +
> 6*vpperm
> +                                               // + vinsertf128
> +      {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1},  // vpperm
> +      {TTI::SK_PermuteTwoSrc, MVT::v32i8, 9},  // 2*vextractf128 +
> 6*vpperm
> +                                               // + vinsertf128
> +      {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1},  // vpperm
>    };
>
>    if (ST->hasXOP())
> @@ -1059,46 +1065,46 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
>        return LT.first * Entry->Cost;
>
>    static const CostTblEntry AVX1ShuffleTbl[] = {
> -    { TTI::SK_Broadcast, MVT::v4f64,  2 }, // vperm2f128 + vpermilpd
> -    { TTI::SK_Broadcast, MVT::v8f32,  2 }, // vperm2f128 + vpermilps
> -    { TTI::SK_Broadcast, MVT::v4i64,  2 }, // vperm2f128 + vpermilpd
> -    { TTI::SK_Broadcast, MVT::v8i32,  2 }, // vperm2f128 + vpermilps
> -    { TTI::SK_Broadcast, MVT::v16i16, 3 }, // vpshuflw + vpshufd +
> vinsertf128
> -    { TTI::SK_Broadcast, MVT::v32i8,  2 }, // vpshufb + vinsertf128
> -
> -    { TTI::SK_Reverse,   MVT::v4f64,  2 }, // vperm2f128 + vpermilpd
> -    { TTI::SK_Reverse,   MVT::v8f32,  2 }, // vperm2f128 + vpermilps
> -    { TTI::SK_Reverse,   MVT::v4i64,  2 }, // vperm2f128 + vpermilpd
> -    { TTI::SK_Reverse,   MVT::v8i32,  2 }, // vperm2f128 + vpermilps
> -    { TTI::SK_Reverse,   MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
> -                                           // + vinsertf128
> -    { TTI::SK_Reverse,   MVT::v32i8,  4 }, // vextractf128 + 2*pshufb
> -                                           // + vinsertf128
> -
> -    { TTI::SK_Select,    MVT::v4i64,  1 }, // vblendpd
> -    { TTI::SK_Select,    MVT::v4f64,  1 }, // vblendpd
> -    { TTI::SK_Select,    MVT::v8i32,  1 }, // vblendps
> -    { TTI::SK_Select,    MVT::v8f32,  1 }, // vblendps
> -    { TTI::SK_Select,    MVT::v16i16, 3 }, // vpand + vpandn + vpor
> -    { TTI::SK_Select,    MVT::v32i8,  3 }, // vpand + vpandn + vpor
> -
> -    { TTI::SK_PermuteSingleSrc, MVT::v4f64,  2 }, // vperm2f128 + vshufpd
> -    { TTI::SK_PermuteSingleSrc, MVT::v4i64,  2 }, // vperm2f128 + vshufpd
> -    { TTI::SK_PermuteSingleSrc, MVT::v8f32,  4 }, // 2*vperm2f128 +
> 2*vshufps
> -    { TTI::SK_PermuteSingleSrc, MVT::v8i32,  4 }, // 2*vperm2f128 +
> 2*vshufps
> -    { TTI::SK_PermuteSingleSrc, MVT::v16i16, 8 }, // vextractf128 +
> 4*pshufb
> +      {TTI::SK_Broadcast, MVT::v4f64, 2},  // vperm2f128 + vpermilpd
> +      {TTI::SK_Broadcast, MVT::v8f32, 2},  // vperm2f128 + vpermilps
> +      {TTI::SK_Broadcast, MVT::v4i64, 2},  // vperm2f128 + vpermilpd
> +      {TTI::SK_Broadcast, MVT::v8i32, 2},  // vperm2f128 + vpermilps
> +      {TTI::SK_Broadcast, MVT::v16i16, 3}, // vpshuflw + vpshufd +
> vinsertf128
> +      {TTI::SK_Broadcast, MVT::v32i8, 2},  // vpshufb + vinsertf128
> +
> +      {TTI::SK_Reverse, MVT::v4f64, 2},  // vperm2f128 + vpermilpd
> +      {TTI::SK_Reverse, MVT::v8f32, 2},  // vperm2f128 + vpermilps
> +      {TTI::SK_Reverse, MVT::v4i64, 2},  // vperm2f128 + vpermilpd
> +      {TTI::SK_Reverse, MVT::v8i32, 2},  // vperm2f128 + vpermilps
> +      {TTI::SK_Reverse, MVT::v16i16, 4}, // vextractf128 + 2*pshufb
> +                                         // + vinsertf128
> +      {TTI::SK_Reverse, MVT::v32i8, 4},  // vextractf128 + 2*pshufb
> +                                         // + vinsertf128
> +
> +      {TTI::SK_Select, MVT::v4i64, 1},  // vblendpd
> +      {TTI::SK_Select, MVT::v4f64, 1},  // vblendpd
> +      {TTI::SK_Select, MVT::v8i32, 1},  // vblendps
> +      {TTI::SK_Select, MVT::v8f32, 1},  // vblendps
> +      {TTI::SK_Select, MVT::v16i16, 3}, // vpand + vpandn + vpor
> +      {TTI::SK_Select, MVT::v32i8, 3},  // vpand + vpandn + vpor
> +
> +      {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2},  // vperm2f128 + vshufpd
> +      {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2},  // vperm2f128 + vshufpd
> +      {TTI::SK_PermuteSingleSrc, MVT::v8f32, 4},  // 2*vperm2f128 +
> 2*vshufps
> +      {TTI::SK_PermuteSingleSrc, MVT::v8i32, 4},  // 2*vperm2f128 +
> 2*vshufps
> +      {TTI::SK_PermuteSingleSrc, MVT::v16i16, 8}, // vextractf128 +
> 4*pshufb
>                                                    // + 2*por + vinsertf128
> -    { TTI::SK_PermuteSingleSrc, MVT::v32i8,  8 }, // vextractf128 +
> 4*pshufb
> +      {TTI::SK_PermuteSingleSrc, MVT::v32i8, 8},  // vextractf128 +
> 4*pshufb
>                                                    // + 2*por + vinsertf128
>
> -    { TTI::SK_PermuteTwoSrc,    MVT::v4f64,   3 }, // 2*vperm2f128 +
> vshufpd
> -    { TTI::SK_PermuteTwoSrc,    MVT::v4i64,   3 }, // 2*vperm2f128 +
> vshufpd
> -    { TTI::SK_PermuteTwoSrc,    MVT::v8f32,   4 }, // 2*vperm2f128 +
> 2*vshufps
> -    { TTI::SK_PermuteTwoSrc,    MVT::v8i32,   4 }, // 2*vperm2f128 +
> 2*vshufps
> -    { TTI::SK_PermuteTwoSrc,    MVT::v16i16, 15 }, // 2*vextractf128 +
> 8*pshufb
> -                                                   // + 4*por +
> vinsertf128
> -    { TTI::SK_PermuteTwoSrc,    MVT::v32i8,  15 }, // 2*vextractf128 +
> 8*pshufb
> -                                                   // + 4*por +
> vinsertf128
> +      {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3},   // 2*vperm2f128 + vshufpd
> +      {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3},   // 2*vperm2f128 + vshufpd
> +      {TTI::SK_PermuteTwoSrc, MVT::v8f32, 4},   // 2*vperm2f128 +
> 2*vshufps
> +      {TTI::SK_PermuteTwoSrc, MVT::v8i32, 4},   // 2*vperm2f128 +
> 2*vshufps
> +      {TTI::SK_PermuteTwoSrc, MVT::v16i16, 15}, // 2*vextractf128 +
> 8*pshufb
> +                                                // + 4*por + vinsertf128
> +      {TTI::SK_PermuteTwoSrc, MVT::v32i8, 15},  // 2*vextractf128 +
> 8*pshufb
> +                                                // + 4*por + vinsertf128
>    };
>
>    if (ST->hasAVX())
> @@ -1106,12 +1112,12 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
>        return LT.first * Entry->Cost;
>
>    static const CostTblEntry SSE41ShuffleTbl[] = {
> -    { TTI::SK_Select,    MVT::v2i64,  1 }, // pblendw
> -    { TTI::SK_Select,    MVT::v2f64,  1 }, // movsd
> -    { TTI::SK_Select,    MVT::v4i32,  1 }, // pblendw
> -    { TTI::SK_Select,    MVT::v4f32,  1 }, // blendps
> -    { TTI::SK_Select,    MVT::v8i16,  1 }, // pblendw
> -    { TTI::SK_Select,    MVT::v16i8,  1 }  // pblendvb
> +      {TTI::SK_Select, MVT::v2i64, 1}, // pblendw
> +      {TTI::SK_Select, MVT::v2f64, 1}, // movsd
> +      {TTI::SK_Select, MVT::v4i32, 1}, // pblendw
> +      {TTI::SK_Select, MVT::v4f32, 1}, // blendps
> +      {TTI::SK_Select, MVT::v8i16, 1}, // pblendw
> +      {TTI::SK_Select, MVT::v16i8, 1}  // pblendvb
>    };
>
>    if (ST->hasSSE41())
> @@ -1119,20 +1125,20 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
>        return LT.first * Entry->Cost;
>
>    static const CostTblEntry SSSE3ShuffleTbl[] = {
> -    { TTI::SK_Broadcast, MVT::v8i16,  1 }, // pshufb
> -    { TTI::SK_Broadcast, MVT::v16i8,  1 }, // pshufb
> +      {TTI::SK_Broadcast, MVT::v8i16, 1}, // pshufb
> +      {TTI::SK_Broadcast, MVT::v16i8, 1}, // pshufb
>
> -    { TTI::SK_Reverse,   MVT::v8i16,  1 }, // pshufb
> -    { TTI::SK_Reverse,   MVT::v16i8,  1 }, // pshufb
> +      {TTI::SK_Reverse, MVT::v8i16, 1}, // pshufb
> +      {TTI::SK_Reverse, MVT::v16i8, 1}, // pshufb
>
> -    { TTI::SK_Select,    MVT::v8i16,  3 }, // 2*pshufb + por
> -    { TTI::SK_Select,    MVT::v16i8,  3 }, // 2*pshufb + por
> +      {TTI::SK_Select, MVT::v8i16, 3}, // 2*pshufb + por
> +      {TTI::SK_Select, MVT::v16i8, 3}, // 2*pshufb + por
>
> -    { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // pshufb
> -    { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb
> +      {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // pshufb
> +      {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb
>
> -    { TTI::SK_PermuteTwoSrc,    MVT::v8i16, 3 }, // 2*pshufb + por
> -    { TTI::SK_PermuteTwoSrc,    MVT::v16i8, 3 }, // 2*pshufb + por
> +      {TTI::SK_PermuteTwoSrc, MVT::v8i16, 3}, // 2*pshufb + por
> +      {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3}, // 2*pshufb + por
>    };
>
>    if (ST->hasSSSE3())
> @@ -1140,29 +1146,29 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
>        return LT.first * Entry->Cost;
>
>    static const CostTblEntry SSE2ShuffleTbl[] = {
> -    { TTI::SK_Broadcast, MVT::v2f64,  1 }, // shufpd
> -    { TTI::SK_Broadcast, MVT::v2i64,  1 }, // pshufd
> -    { TTI::SK_Broadcast, MVT::v4i32,  1 }, // pshufd
> -    { TTI::SK_Broadcast, MVT::v8i16,  2 }, // pshuflw + pshufd
> -    { TTI::SK_Broadcast, MVT::v16i8,  3 }, // unpck + pshuflw + pshufd
> -
> -    { TTI::SK_Reverse,   MVT::v2f64,  1 }, // shufpd
> -    { TTI::SK_Reverse,   MVT::v2i64,  1 }, // pshufd
> -    { TTI::SK_Reverse,   MVT::v4i32,  1 }, // pshufd
> -    { TTI::SK_Reverse,   MVT::v8i16,  3 }, // pshuflw + pshufhw + pshufd
> -    { TTI::SK_Reverse,   MVT::v16i8,  9 }, // 2*pshuflw + 2*pshufhw
> -                                           // + 2*pshufd + 2*unpck +
> packus
> -
> -    { TTI::SK_Select,    MVT::v2i64,  1 }, // movsd
> -    { TTI::SK_Select,    MVT::v2f64,  1 }, // movsd
> -    { TTI::SK_Select,    MVT::v4i32,  2 }, // 2*shufps
> -    { TTI::SK_Select,    MVT::v8i16,  3 }, // pand + pandn + por
> -    { TTI::SK_Select,    MVT::v16i8,  3 }, // pand + pandn + por
> -
> -    { TTI::SK_PermuteSingleSrc, MVT::v2f64,  1 }, // shufpd
> -    { TTI::SK_PermuteSingleSrc, MVT::v2i64,  1 }, // pshufd
> -    { TTI::SK_PermuteSingleSrc, MVT::v4i32,  1 }, // pshufd
> -    { TTI::SK_PermuteSingleSrc, MVT::v8i16,  5 }, // 2*pshuflw + 2*pshufhw
> +      {TTI::SK_Broadcast, MVT::v2f64, 1}, // shufpd
> +      {TTI::SK_Broadcast, MVT::v2i64, 1}, // pshufd
> +      {TTI::SK_Broadcast, MVT::v4i32, 1}, // pshufd
> +      {TTI::SK_Broadcast, MVT::v8i16, 2}, // pshuflw + pshufd
> +      {TTI::SK_Broadcast, MVT::v16i8, 3}, // unpck + pshuflw + pshufd
> +
> +      {TTI::SK_Reverse, MVT::v2f64, 1}, // shufpd
> +      {TTI::SK_Reverse, MVT::v2i64, 1}, // pshufd
> +      {TTI::SK_Reverse, MVT::v4i32, 1}, // pshufd
> +      {TTI::SK_Reverse, MVT::v8i16, 3}, // pshuflw + pshufhw + pshufd
> +      {TTI::SK_Reverse, MVT::v16i8, 9}, // 2*pshuflw + 2*pshufhw
> +                                        // + 2*pshufd + 2*unpck + packus
> +
> +      {TTI::SK_Select, MVT::v2i64, 1}, // movsd
> +      {TTI::SK_Select, MVT::v2f64, 1}, // movsd
> +      {TTI::SK_Select, MVT::v4i32, 2}, // 2*shufps
> +      {TTI::SK_Select, MVT::v8i16, 3}, // pand + pandn + por
> +      {TTI::SK_Select, MVT::v16i8, 3}, // pand + pandn + por
> +
> +      {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // shufpd
> +      {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, // pshufd
> +      {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // pshufd
> +      {TTI::SK_PermuteSingleSrc, MVT::v8i16, 5}, // 2*pshuflw + 2*pshufhw
>                                                    // + pshufd/unpck
>      { TTI::SK_PermuteSingleSrc, MVT::v16i8, 10 }, // 2*pshuflw + 2*pshufhw
>                                                    // + 2*pshufd + 2*unpck
> + 2*packus
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-add.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-add.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-add.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-add.ll Fri Nov  9
> 11:04:27 2018
> @@ -12,17 +12,17 @@ define i32 @reduce_i64(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i64'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i64'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 21 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i64'
> @@ -46,7 +46,7 @@ define i32 @reduce_i64(i32 %arg) {
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>
> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>
> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i64
> @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
> -; AVX512-NEXT:  Cost Model: Found an estimated cost of 17 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>    %V1  = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x
> i64> undef)
> @@ -61,17 +61,17 @@ define i32 @reduce_i32(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i32'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i32'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 25 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i32'
> @@ -95,7 +95,7 @@ define i32 @reduce_i32(i32 %arg) {
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>
> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>
> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i32
> @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
> -; AVX512-NEXT:  Cost Model: Found an estimated cost of 27 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 26 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>    %V2  = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x
> i32> undef)
> @@ -110,17 +110,17 @@ define i32 @reduce_i16(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i16'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i16'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i16'
> @@ -135,24 +135,24 @@ define i32 @reduce_i16(i32 %arg) {
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 64 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 90 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 72 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 88 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i16'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 39 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512F-LABEL: 'reduce_i16'
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512BW-LABEL: 'reduce_i16'
> @@ -160,15 +160,15 @@ define i32 @reduce_i16(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 42 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 45 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i16'
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 3 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>    %V4  = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x
> i16> undef)
> @@ -183,49 +183,49 @@ define i32 @reduce_i8(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i8'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 74 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i8'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i8'
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i8'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 92 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 101 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 118 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 100 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 116 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i8'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 57 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 59 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512F-LABEL: 'reduce_i8'
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8
> x i8> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 57 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 60 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 65 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 59 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 63 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512BW-LABEL: 'reduce_i8'
> @@ -233,15 +233,15 @@ define i32 @reduce_i8(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 52 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 118 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 121 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i8'
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8
> x i8> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 57 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 60 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 65 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 59 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 63 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>    %V8   = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8>
> undef)
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-and.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-and.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-and.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-and.ll Fri Nov  9
> 11:04:27 2018
> @@ -12,25 +12,25 @@ define i32 @reduce_i64(i32 %arg) {
>  ; SSE-LABEL: 'reduce_i64'
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64>
> undef)
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>
> undef)
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i64'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i64'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512-LABEL: 'reduce_i64'
> @@ -38,7 +38,7 @@ define i32 @reduce_i64(i32 %arg) {
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>
> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>
> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i64
> @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef)
> -; AVX512-NEXT:  Cost Model: Found an estimated cost of 17 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef)
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>    %V1  = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x
> i64> undef)
> @@ -53,25 +53,25 @@ define i32 @reduce_i32(i32 %arg) {
>  ; SSE-LABEL: 'reduce_i32'
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32>
> undef)
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>
> undef)
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i32'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i32'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512-LABEL: 'reduce_i32'
> @@ -79,7 +79,7 @@ define i32 @reduce_i32(i32 %arg) {
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32>
> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i32
> @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i32
> @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef)
> -; AVX512-NEXT:  Cost Model: Found an estimated cost of 27 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef)
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 26 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>    %V2  = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x
> i32> undef)
> @@ -94,49 +94,49 @@ define i32 @reduce_i16(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i16'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i16'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i16'
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i16'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 52 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 55 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 58 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i16'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 39 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512F-LABEL: 'reduce_i16'
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512BW-LABEL: 'reduce_i16'
> @@ -144,15 +144,15 @@ define i32 @reduce_i16(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 42 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 45 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i16'
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>    %V4  = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x
> i16> undef)
> @@ -167,49 +167,49 @@ define i32 @reduce_i8(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i8'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 74 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i8'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i8'
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i8'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 85 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 79 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 83 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i8'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 57 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 59 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512F-LABEL: 'reduce_i8'
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8
> x i8> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 57 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 60 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 65 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 59 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 63 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512BW-LABEL: 'reduce_i8'
> @@ -217,15 +217,15 @@ define i32 @reduce_i8(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 52 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 118 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 121 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i8'
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8
> x i8> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 57 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 60 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 65 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 59 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 63 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>    %V8   = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8>
> undef)
> @@ -243,9 +243,9 @@ define i32 @reduce_i1(i32 %arg) {
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 74 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i1'
> @@ -254,9 +254,9 @@ define i32 @reduce_i1(i32 %arg) {
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i1'
> @@ -265,9 +265,9 @@ define i32 @reduce_i1(i32 %arg) {
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i1'
> @@ -277,8 +277,8 @@ define i32 @reduce_i1(i32 %arg) {
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 85 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 79 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 83 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i1'
> @@ -288,8 +288,8 @@ define i32 @reduce_i1(i32 %arg) {
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 57 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 59 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512F-LABEL: 'reduce_i1'
> @@ -298,9 +298,9 @@ define i32 @reduce_i1(i32 %arg) {
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for
> instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4
> x i1> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 59 for
> instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8
> x i1> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 151 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 156 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 165 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 150 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 154 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 162 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512BW-LABEL: 'reduce_i1'
> @@ -311,7 +311,7 @@ define i32 @reduce_i1(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 357 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 838 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 841 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 840 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i1'
> @@ -320,9 +320,9 @@ define i32 @reduce_i1(i32 %arg) {
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 22 for
> instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4
> x i1> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 59 for
> instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8
> x i1> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 151 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 156 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 165 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 150 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 154 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 162 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>    %V1   = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1>
> undef)
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-mul.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-mul.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-mul.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-mul.ll Fri Nov  9
> 11:04:27 2018
> @@ -12,25 +12,25 @@ define i32 @reduce_i64(i32 %arg) {
>  ; SSE-LABEL: 'reduce_i64'
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64>
> undef)
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 61 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 126 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 59 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 123 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>
> undef)
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i64'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 81 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 154 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 152 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i64'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 39 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 70 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512F-LABEL: 'reduce_i64'
> @@ -38,7 +38,7 @@ define i32 @reduce_i64(i32 %arg) {
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for
> instruction: %V2 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for
> instruction: %V4 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 35 for
> instruction: %V8 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 52 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 51 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512BW-LABEL: 'reduce_i64'
> @@ -46,7 +46,7 @@ define i32 @reduce_i64(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for
> instruction: %V2 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 22 for
> instruction: %V4 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 35 for
> instruction: %V8 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 52 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 51 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i64'
> @@ -54,7 +54,7 @@ define i32 @reduce_i64(i32 %arg) {
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for
> instruction: %V2 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>    %V1  = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x
> i64> undef)
> @@ -69,41 +69,41 @@ define i32 @reduce_i32(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i32'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 105 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 102 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i32'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 56 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 105 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 54 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 102 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i32'
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 41 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i32'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 41 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 58 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 56 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i32'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512F-LABEL: 'reduce_i32'
> @@ -111,7 +111,7 @@ define i32 @reduce_i32(i32 %arg) {
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512BW-LABEL: 'reduce_i32'
> @@ -119,7 +119,7 @@ define i32 @reduce_i32(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 26 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i32'
> @@ -127,7 +127,7 @@ define i32 @reduce_i32(i32 %arg) {
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 26 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>    %V2  = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x
> i32> undef)
> @@ -142,49 +142,49 @@ define i32 @reduce_i16(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i16'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i16'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i16'
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i16'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 64 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 90 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 72 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 88 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i16'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 39 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512F-LABEL: 'reduce_i16'
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512BW-LABEL: 'reduce_i16'
> @@ -192,15 +192,15 @@ define i32 @reduce_i16(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 42 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 45 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i16'
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>    %V4  = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x
> i16> undef)
> @@ -215,49 +215,49 @@ define i32 @reduce_i8(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i8'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 104 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 129 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 178 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 275 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 128 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 176 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 272 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i8'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 68 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 93 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 142 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 239 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 92 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 140 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 236 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i8'
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 68 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 93 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 142 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 239 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 92 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 140 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 236 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i8'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 68 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 202 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 255 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 360 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 254 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 358 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i8'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 137 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 241 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 171 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 239 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512F-LABEL: 'reduce_i8'
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8
> x i8> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 40 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 117 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 144 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 197 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 143 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 195 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512BW-LABEL: 'reduce_i8'
> @@ -265,15 +265,15 @@ define i32 @reduce_i8(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 67 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 178 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 201 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 200 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i8'
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8
> x i8> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 40 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 117 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 144 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 197 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 143 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 195 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>    %V8   = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8>
> undef)
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-or.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-or.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-or.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-or.ll Fri Nov  9
> 11:04:27 2018
> @@ -12,25 +12,25 @@ define i32 @reduce_i64(i32 %arg) {
>  ; SSE-LABEL: 'reduce_i64'
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64>
> undef)
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>
> undef)
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i64'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i64'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512-LABEL: 'reduce_i64'
> @@ -38,7 +38,7 @@ define i32 @reduce_i64(i32 %arg) {
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>
> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>
> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i64
> @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef)
> -; AVX512-NEXT:  Cost Model: Found an estimated cost of 17 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef)
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>    %V1  = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64>
> undef)
> @@ -53,25 +53,25 @@ define i32 @reduce_i32(i32 %arg) {
>  ; SSE-LABEL: 'reduce_i32'
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>
> undef)
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>
> undef)
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i32'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i32'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512-LABEL: 'reduce_i32'
> @@ -79,7 +79,7 @@ define i32 @reduce_i32(i32 %arg) {
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>
> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i32
> @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i32
> @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef)
> -; AVX512-NEXT:  Cost Model: Found an estimated cost of 27 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef)
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 26 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>    %V2  = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>
> undef)
> @@ -94,49 +94,49 @@ define i32 @reduce_i16(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i16'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i16'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i16'
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i16'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 52 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 55 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 58 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i16'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 39 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512F-LABEL: 'reduce_i16'
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512BW-LABEL: 'reduce_i16'
> @@ -144,15 +144,15 @@ define i32 @reduce_i16(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 42 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 45 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i16'
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>    %V4  = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>
> undef)
> @@ -167,49 +167,49 @@ define i32 @reduce_i8(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i8'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 74 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i8'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i8'
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i8'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 85 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 79 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 83 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i8'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 57 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 59 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512F-LABEL: 'reduce_i8'
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x
> i8> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 57 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 60 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 65 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 59 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 63 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512BW-LABEL: 'reduce_i8'
> @@ -217,15 +217,15 @@ define i32 @reduce_i8(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 52 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 118 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 121 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i8'
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x
> i8> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 57 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 60 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 65 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 59 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 63 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>    %V8   = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8>
> undef)
> @@ -243,9 +243,9 @@ define i32 @reduce_i1(i32 %arg) {
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 74 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i1'
> @@ -254,9 +254,9 @@ define i32 @reduce_i1(i32 %arg) {
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i1'
> @@ -265,9 +265,9 @@ define i32 @reduce_i1(i32 %arg) {
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i1'
> @@ -277,8 +277,8 @@ define i32 @reduce_i1(i32 %arg) {
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 85 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 79 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 83 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i1'
> @@ -288,8 +288,8 @@ define i32 @reduce_i1(i32 %arg) {
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 57 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 59 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512F-LABEL: 'reduce_i1'
> @@ -298,9 +298,9 @@ define i32 @reduce_i1(i32 %arg) {
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for
> instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x
> i1> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 59 for
> instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x
> i1> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 151 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 156 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 165 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 150 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 154 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 162 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512BW-LABEL: 'reduce_i1'
> @@ -311,7 +311,7 @@ define i32 @reduce_i1(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 357 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 838 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 841 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 840 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i1'
> @@ -320,9 +320,9 @@ define i32 @reduce_i1(i32 %arg) {
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 22 for
> instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x
> i1> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 59 for
> instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x
> i1> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 151 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 156 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 165 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 150 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 154 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 162 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>    %V1   = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1>
> undef)
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-smax.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-smax.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-smax.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-smax.ll Fri Nov  9
> 11:04:27 2018
> @@ -12,17 +12,17 @@ define i32 @reduce_i64(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i64'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 146 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 71 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 143 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i64'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 146 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 35 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 71 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 143 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i64'
> @@ -37,8 +37,8 @@ define i32 @reduce_i64(i32 %arg) {
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 59 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 37 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 57 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i64'
> @@ -69,17 +69,17 @@ define i32 @reduce_i32(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i32'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i32'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 33 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 50 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i32'
> @@ -126,17 +126,17 @@ define i32 @reduce_i16(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i16'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 51 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 74 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i16'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 34 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 39 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 46 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 62 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i16'
> @@ -151,8 +151,8 @@ define i32 @reduce_i16(i32 %arg) {
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 101 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 112 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 133 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 111 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 131 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i16'
> @@ -176,7 +176,7 @@ define i32 @reduce_i16(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 112 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 117 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 116 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i16'
> @@ -199,33 +199,33 @@ define i32 @reduce_i8(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i8'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 97 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 102 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 111 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 128 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 101 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 109 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 125 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i8'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 34 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 66 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 75 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 92 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 89 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i8'
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 66 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 75 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 92 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 89 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i8'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 162 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 173 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 194 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 172 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 192 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i8'
> @@ -249,7 +249,7 @@ define i32 @reduce_i8(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 61 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 253 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 258 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 257 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i8'
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-smin.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-smin.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-smin.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-smin.ll Fri Nov  9
> 11:04:27 2018
> @@ -12,17 +12,17 @@ define i32 @reduce_i64(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i64'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 146 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 71 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 143 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i64'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 146 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 35 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 71 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 143 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i64'
> @@ -37,8 +37,8 @@ define i32 @reduce_i64(i32 %arg) {
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 59 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 37 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 57 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i64'
> @@ -69,17 +69,17 @@ define i32 @reduce_i32(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i32'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i32'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 33 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 50 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i32'
> @@ -126,17 +126,17 @@ define i32 @reduce_i16(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i16'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 51 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 74 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i16'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 34 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 39 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 46 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 62 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i16'
> @@ -151,8 +151,8 @@ define i32 @reduce_i16(i32 %arg) {
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 101 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 112 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 133 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 111 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 131 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i16'
> @@ -176,7 +176,7 @@ define i32 @reduce_i16(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 112 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 117 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 116 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i16'
> @@ -199,33 +199,33 @@ define i32 @reduce_i8(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i8'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 97 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 102 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 111 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 128 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 101 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 109 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 125 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i8'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 34 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 66 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 75 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 92 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 89 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i8'
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 66 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 75 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 92 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 89 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i8'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 162 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 173 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 194 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 172 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 192 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i8'
> @@ -249,7 +249,7 @@ define i32 @reduce_i8(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 61 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 253 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 258 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 257 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i8'
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-umax.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-umax.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-umax.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-umax.ll Fri Nov  9
> 11:04:27 2018
> @@ -12,17 +12,17 @@ define i32 @reduce_i64(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i64'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 146 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 71 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 143 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i64'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 146 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 35 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 71 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 143 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i64'
> @@ -37,8 +37,8 @@ define i32 @reduce_i64(i32 %arg) {
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 59 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 37 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 57 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i64'
> @@ -69,17 +69,17 @@ define i32 @reduce_i32(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i32'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i32'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 33 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 50 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i32'
> @@ -126,17 +126,17 @@ define i32 @reduce_i16(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i16'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 51 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 74 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i16'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 34 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 39 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 46 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 62 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i16'
> @@ -151,8 +151,8 @@ define i32 @reduce_i16(i32 %arg) {
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 101 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 112 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 133 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 111 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 131 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i16'
> @@ -176,7 +176,7 @@ define i32 @reduce_i16(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 112 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 117 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 116 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i16'
> @@ -199,33 +199,33 @@ define i32 @reduce_i8(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i8'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 97 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 102 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 111 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 128 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 101 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 109 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 125 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i8'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 34 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 66 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 75 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 92 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 89 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i8'
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 66 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 75 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 92 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 89 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i8'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 162 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 173 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 194 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 172 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 192 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i8'
> @@ -249,7 +249,7 @@ define i32 @reduce_i8(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 61 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 253 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 258 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 257 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i8'
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-umin.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-umin.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-umin.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-umin.ll Fri Nov  9
> 11:04:27 2018
> @@ -12,17 +12,17 @@ define i32 @reduce_i64(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i64'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 146 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 71 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 143 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i64'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 146 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 35 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 71 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 143 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i64'
> @@ -37,8 +37,8 @@ define i32 @reduce_i64(i32 %arg) {
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 59 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 37 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 57 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i64'
> @@ -69,17 +69,17 @@ define i32 @reduce_i32(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i32'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i32'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 33 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 50 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i32'
> @@ -126,17 +126,17 @@ define i32 @reduce_i16(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i16'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 51 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 74 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i16'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 34 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 39 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 46 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 62 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i16'
> @@ -151,8 +151,8 @@ define i32 @reduce_i16(i32 %arg) {
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 101 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 112 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 133 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 111 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 131 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i16'
> @@ -176,7 +176,7 @@ define i32 @reduce_i16(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 112 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 117 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 116 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i16'
> @@ -199,33 +199,33 @@ define i32 @reduce_i8(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i8'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 97 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 102 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 111 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 128 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 101 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 109 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 125 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i8'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 34 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 66 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 75 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 92 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 89 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i8'
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 66 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 75 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 92 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 73 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 89 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i8'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 162 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 173 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 194 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 172 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 192 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i8'
> @@ -249,7 +249,7 @@ define i32 @reduce_i8(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 61 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 253 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 258 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 257 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i8'
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-xor.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-xor.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-xor.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-xor.ll Fri Nov  9
> 11:04:27 2018
> @@ -12,25 +12,25 @@ define i32 @reduce_i64(i32 %arg) {
>  ; SSE-LABEL: 'reduce_i64'
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64>
> undef)
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>
> undef)
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i64'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i64'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512-LABEL: 'reduce_i64'
> @@ -38,7 +38,7 @@ define i32 @reduce_i64(i32 %arg) {
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>
> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64>
> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i64
> @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef)
> -; AVX512-NEXT:  Cost Model: Found an estimated cost of 17 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef)
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>    %V1  = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x
> i64> undef)
> @@ -53,25 +53,25 @@ define i32 @reduce_i32(i32 %arg) {
>  ; SSE-LABEL: 'reduce_i32'
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32>
> undef)
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>
> undef)
> -; SSE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>
> undef)
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>
> undef)
>  ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i32'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i32'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512-LABEL: 'reduce_i32'
> @@ -79,7 +79,7 @@ define i32 @reduce_i32(i32 %arg) {
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>
> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i32
> @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i32
> @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef)
> -; AVX512-NEXT:  Cost Model: Found an estimated cost of 27 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef)
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 26 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef)
>  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>    %V2  = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x
> i32> undef)
> @@ -94,49 +94,49 @@ define i32 @reduce_i16(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i16'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i16'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i16'
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i16'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 52 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 55 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 58 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i16'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 39 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512F-LABEL: 'reduce_i16'
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512BW-LABEL: 'reduce_i16'
> @@ -144,15 +144,15 @@ define i32 @reduce_i16(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 42 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 45 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i16'
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>    %V4  = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x
> i16> undef)
> @@ -167,49 +167,49 @@ define i32 @reduce_i8(i32 %arg) {
>  ; SSE2-LABEL: 'reduce_i8'
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 74 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i8'
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i8'
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i8'
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 85 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 79 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 83 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i8'
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 57 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 59 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512F-LABEL: 'reduce_i8'
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8
> x i8> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 57 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 60 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 65 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 59 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 63 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512BW-LABEL: 'reduce_i8'
> @@ -217,15 +217,15 @@ define i32 @reduce_i8(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 52 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 118 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 121 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i8'
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8
> x i8> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 57 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 60 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 65 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 59 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 63 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>    %V8   = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8>
> undef)
> @@ -243,9 +243,9 @@ define i32 @reduce_i1(i32 %arg) {
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 74 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSSE3-LABEL: 'reduce_i1'
> @@ -254,9 +254,9 @@ define i32 @reduce_i1(i32 %arg) {
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; SSE42-LABEL: 'reduce_i1'
> @@ -265,9 +265,9 @@ define i32 @reduce_i1(i32 %arg) {
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> -; SSE42-NEXT:  Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> +; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
>  ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX1-LABEL: 'reduce_i1'
> @@ -277,8 +277,8 @@ define i32 @reduce_i1(i32 %arg) {
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 77 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> -; AVX1-NEXT:  Cost Model: Found an estimated cost of 85 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 79 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> +; AVX1-NEXT:  Cost Model: Found an estimated cost of 83 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
>  ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX2-LABEL: 'reduce_i1'
> @@ -288,8 +288,8 @@ define i32 @reduce_i1(i32 %arg) {
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 57 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> -; AVX2-NEXT:  Cost Model: Found an estimated cost of 65 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 59 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> +; AVX2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
>  ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
>  ;
>  ; AVX512F-LABEL: 'reduce_i1'
> @@ -298,9 +298,9 @@ define i32 @reduce_i1(i32 %arg) {
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for
> instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4
> x i1> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 59 for
> instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8
> x i1> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 151 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 156 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
> -; AVX512F-NEXT:  Cost Model: Found an estimated cost of 165 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 150 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 154 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
> +; AVX512F-NEXT:  Cost Model: Found an estimated cost of 162 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
>  ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512BW-LABEL: 'reduce_i1'
> @@ -311,7 +311,7 @@ define i32 @reduce_i1(i32 %arg) {
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 357 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 838 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
> -; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 841 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
> +; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 840 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
>  ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>  ; AVX512DQ-LABEL: 'reduce_i1'
> @@ -320,9 +320,9 @@ define i32 @reduce_i1(i32 %arg) {
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 22 for
> instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4
> x i1> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 59 for
> instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8
> x i1> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 151 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 156 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
> -; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 165 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 150 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 154 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
> +; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 162 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
>  ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
>  ;
>    %V1   = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1>
> undef)
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduction.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduction.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduction.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduction.ll Fri Nov  9
> 11:04:27 2018
> @@ -59,7 +59,7 @@ define fastcc i32 @reduction_cost_int(<8
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %r = extractelement <8 x i32> %bin.rdx.3, i32 0
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %r = extractelement <8 x i32> %bin.rdx.3, i32 0
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 %r
>  ;
>  ; SSSE3-LABEL: 'reduction_cost_int'
> @@ -69,7 +69,7 @@ define fastcc i32 @reduction_cost_int(<8
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %r = extractelement <8 x i32> %bin.rdx.3, i32 0
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %r = extractelement <8 x i32> %bin.rdx.3, i32 0
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 %r
>  ;
>  ; SSE42-LABEL: 'reduction_cost_int'
> @@ -376,7 +376,7 @@ define fastcc double @no_pairwise_reduct
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef>
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction:
> %r = extractelement <4 x double> %bin.rdx8, i32 0
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %r = extractelement <4 x double> %bin.rdx8, i32 0
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret double %r
>  ;
>  ; SSSE3-LABEL: 'no_pairwise_reduction4double'
> @@ -384,7 +384,7 @@ define fastcc double @no_pairwise_reduct
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef>
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction:
> %r = extractelement <4 x double> %bin.rdx8, i32 0
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:
> %r = extractelement <4 x double> %bin.rdx8, i32 0
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret double %r
>  ;
>  ; SSE42-LABEL: 'no_pairwise_reduction4double'
> @@ -428,7 +428,7 @@ define fastcc float @no_pairwise_reducti
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %r = extractelement <8 x float> %bin.rdx8, i32 0
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:
> %r = extractelement <8 x float> %bin.rdx8, i32 0
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret float %r
>  ;
>  ; SSSE3-LABEL: 'no_pairwise_reduction8float'
> @@ -438,7 +438,7 @@ define fastcc float @no_pairwise_reducti
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %r = extractelement <8 x float> %bin.rdx8, i32 0
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:
> %r = extractelement <8 x float> %bin.rdx8, i32 0
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret float %r
>  ;
>  ; SSE42-LABEL: 'no_pairwise_reduction8float'
> @@ -562,7 +562,7 @@ define fastcc i64 @no_pairwise_reduction
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx = add <4 x i64> %rdx, %rdx.shuf
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32>
> <i32 1, i32 undef, i32 undef, i32 undef>
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction:
> %r = extractelement <4 x i64> %bin.rdx8, i32 0
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction:
> %r = extractelement <4 x i64> %bin.rdx8, i32 0
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i64 %r
>  ;
>  ; SSSE3-LABEL: 'no_pairwise_reduction4i64'
> @@ -570,7 +570,7 @@ define fastcc i64 @no_pairwise_reduction
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx = add <4 x i64> %rdx, %rdx.shuf
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32>
> <i32 1, i32 undef, i32 undef, i32 undef>
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction:
> %r = extractelement <4 x i64> %bin.rdx8, i32 0
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction:
> %r = extractelement <4 x i64> %bin.rdx8, i32 0
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i64 %r
>  ;
>  ; SSE42-LABEL: 'no_pairwise_reduction4i64'
> @@ -666,7 +666,7 @@ define fastcc i32 @no_pairwise_reduction
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32>
> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef>
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %r = extractelement <8 x i32> %bin.rdx8, i32 0
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %r = extractelement <8 x i32> %bin.rdx8, i32 0
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 %r
>  ;
>  ; SSSE3-LABEL: 'no_pairwise_reduction8i32'
> @@ -676,7 +676,7 @@ define fastcc i32 @no_pairwise_reduction
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32>
> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef>
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %r = extractelement <8 x i32> %bin.rdx8, i32 0
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:
> %r = extractelement <8 x i32> %bin.rdx8, i32 0
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 %r
>  ;
>  ; SSE42-LABEL: 'no_pairwise_reduction8i32'
> @@ -817,7 +817,7 @@ define fastcc double @pairwise_reduction
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4
> x i32> <i32 0, i32 undef, i32 undef, i32 undef>
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4
> x i32> <i32 1, i32 undef, i32 undef, i32 undef>
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %r = extractelement <4 x double> %bin.rdx8, i32 0
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction:
> %r = extractelement <4 x double> %bin.rdx8, i32 0
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret double %r
>  ;
>  ; SSSE3-LABEL: 'pairwise_reduction4double'
> @@ -827,7 +827,7 @@ define fastcc double @pairwise_reduction
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4
> x i32> <i32 0, i32 undef, i32 undef, i32 undef>
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4
> x i32> <i32 1, i32 undef, i32 undef, i32 undef>
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %r = extractelement <4 x double> %bin.rdx8, i32 0
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction:
> %r = extractelement <4 x double> %bin.rdx8, i32 0
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret double %r
>  ;
>  ; SSE42-LABEL: 'pairwise_reduction4double'
> @@ -882,7 +882,7 @@ define fastcc float @pairwise_reduction8
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8
> x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8
> x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %r = extractelement <8 x float> %bin.rdx9, i32 0
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction:
> %r = extractelement <8 x float> %bin.rdx9, i32 0
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret float %r
>  ;
>  ; SSSE3-LABEL: 'pairwise_reduction8float'
> @@ -895,7 +895,7 @@ define fastcc float @pairwise_reduction8
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8
> x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8
> x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction:
> %r = extractelement <8 x float> %bin.rdx9, i32 0
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction:
> %r = extractelement <8 x float> %bin.rdx9, i32 0
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret float %r
>  ;
>  ; SSE42-LABEL: 'pairwise_reduction8float'
> @@ -1048,7 +1048,7 @@ define fastcc i64 @pairwise_reduction4i6
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x
> i32> <i32 0, i32 undef, i32 undef, i32 undef>
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef>
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction:
> %r = extractelement <4 x i64> %bin.rdx8, i32 0
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction:
> %r = extractelement <4 x i64> %bin.rdx8, i32 0
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i64 %r
>  ;
>  ; SSSE3-LABEL: 'pairwise_reduction4i64'
> @@ -1058,7 +1058,7 @@ define fastcc i64 @pairwise_reduction4i6
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x
> i32> <i32 0, i32 undef, i32 undef, i32 undef>
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef>
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction:
> %r = extractelement <4 x i64> %bin.rdx8, i32 0
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction:
> %r = extractelement <4 x i64> %bin.rdx8, i32 0
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i64 %r
>  ;
>  ; SSE42-LABEL: 'pairwise_reduction4i64'
> @@ -1180,7 +1180,7 @@ define fastcc i32 @pairwise_reduction8i3
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x
> i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1
> -; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %r = extractelement <8 x i32> %bin.rdx9, i32 0
> +; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction:
> %r = extractelement <8 x i32> %bin.rdx9, i32 0
>  ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 %r
>  ;
>  ; SSSE3-LABEL: 'pairwise_reduction8i32'
> @@ -1193,7 +1193,7 @@ define fastcc i32 @pairwise_reduction8i3
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x
> i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1
> -; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction:
> %r = extractelement <8 x i32> %bin.rdx9, i32 0
> +; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction:
> %r = extractelement <8 x i32> %bin.rdx9, i32 0
>  ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 %r
>  ;
>  ; SSE42-LABEL: 'pairwise_reduction8i32'
>
> Modified:
> llvm/trunk/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll
> (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll
> Fri Nov  9 11:04:27 2018
> @@ -17,28 +17,52 @@
>  ;
>
>  define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) {
> -; CHECK-LABEL: 'test_vXf64'
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 0, i32 1>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 2, i32 3>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 0, i32 1>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 2, i32 3>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 4, i32 5>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 6, i32 7>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 0, i32 1, i32 2, i32 3>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 2, i32 3, i32 4, i32 5>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret void
> +; SSE-LABEL: 'test_vXf64'
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 0, i32 1>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 2, i32 3>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 0, i32 1>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 2, i32 3>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 4, i32 5>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 6, i32 7>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 0, i32 1, i32 2, i32 3>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 2, i32 3, i32 4, i32 5>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret void
> +;
> +; AVX-LABEL: 'test_vXf64'
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 0, i32 1>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 2, i32 3>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 0, i32 1>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 2, i32 3>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 4, i32 5>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 6, i32 7>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 0, i32 1, i32 2, i32 3>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 2, i32 3, i32 4, i32 5>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret void
> +;
> +; AVX512-LABEL: 'test_vXf64'
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 0, i32 1>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 2, i32 3>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 0, i32 1>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 2, i32 3>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 4, i32 5>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 6, i32 7>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 0, i32 1, i32 2, i32 3>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 2, i32 3, i32 4, i32 5>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret void
>  ;
>  ; BTVER2-LABEL: 'test_vXf64'
> -; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 0, i32 1>
> +; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 0, i32 1>
>  ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 2, i32 3>
> -; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 0, i32 1>
> +; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 0, i32 1>
>  ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 2, i32 3>
> -; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 4, i32 5>
> +; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 4, i32 5>
>  ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 6, i32 7>
> -; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 0, i32 1, i32 2, i32 3>
> +; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 0, i32 1, i32 2, i32 3>
>  ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 2, i32 3, i32 4, i32 5>
> -; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> +; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
>  ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret void
>  ;
>    %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 0, i32 1>
> @@ -54,28 +78,52 @@ define void @test_vXf64(<4 x double> %sr
>  }
>
>  define void @test_vXfi64(<4 x i64> %src256, <8 x i64> %src512) {
> -; CHECK-LABEL: 'test_vXfi64'
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 4, i32 5>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 6, i32 7>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 0, i32 1, i32 2, i32 3>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 2, i32 3, i32 4, i32 5>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 4, i32 5, i32 6, i32 7>
> -; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret void
> +; SSE-LABEL: 'test_vXfi64'
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 4, i32 5>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 6, i32 7>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 0, i32 1, i32 2, i32 3>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 2, i32 3, i32 4, i32 5>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 4, i32 5, i32 6, i32 7>
> +; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret void
> +;
> +; AVX-LABEL: 'test_vXfi64'
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 4, i32 5>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 6, i32 7>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 0, i32 1, i32 2, i32 3>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 2, i32 3, i32 4, i32 5>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 4, i32 5, i32 6, i32 7>
> +; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret void
> +;
> +; AVX512-LABEL: 'test_vXfi64'
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 4, i32 5>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 6, i32 7>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 0, i32 1, i32 2, i32 3>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 2, i32 3, i32 4, i32 5>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 4, i32 5, i32 6, i32 7>
> +; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret void
>  ;
>  ; BTVER2-LABEL: 'test_vXfi64'
> -; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> +; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 0, i32 1>
>  ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> -; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> +; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 0, i32 1>
>  ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> -; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 4, i32 5>
> +; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 4, i32 5>
>  ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 6, i32 7>
> -; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 0, i32 1, i32 2, i32 3>
> +; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 0, i32 1, i32 2, i32 3>
>  ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 2, i32 3, i32 4, i32 5>
> -; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 4, i32 5, i32 6, i32 7>
> +; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 4, i32 5, i32 6, i32 7>
>  ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret void
>  ;
>    %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32>
> <i32 0, i32 1>
>
> Modified:
> llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
> (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll Fri
> Nov  9 11:04:27 2018
> @@ -13,7 +13,7 @@
>  ; Vector cost is 5, Scalar cost is 7
>  ; CHECK: Adding cost -2 for reduction that starts with   %7 = load i32,
> i32* %arrayidx.7, align 4 (It is a splitting reduction)
>  ; Vector cost is 11, Scalar cost is 7
> -; SSE2:  Adding cost 4 for reduction that starts with   %7 = load i32,
> i32* %arrayidx.7, align 4 (It is a splitting reduction)
> +; SSE2:  Adding cost 3 for reduction that starts with   %7 = load i32,
> i32* %arrayidx.7, align 4 (It is a splitting reduction)
>  define i32 @test_add(i32* nocapture readonly %p) {
>  ; CHECK-LABEL: @test_add(
>  ; CHECK-NEXT:  entry:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181109/1c50a087/attachment-0001.html>


More information about the llvm-commits mailing list