[llvm] r346538 - [CostModel][X86] SK_ExtractSubvector is free if the subvector is at the start of the source vector
George Burgess IV via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 9 19:30:16 PST 2018
Happy Friday (night)!
It looks like this change either causes or unmasks a bug (
http://llvm.org/pr39615) in LLVM. I don't know our vector or machine bits
well, so can you please help look into this? :)
Thank you,
George
On Fri, Nov 9, 2018 at 11:06 AM Simon Pilgrim via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> Author: rksimon
> Date: Fri Nov 9 11:04:27 2018
> New Revision: 346538
>
> URL: http://llvm.org/viewvc/llvm-project?rev=346538&view=rev
> Log:
> [CostModel][X86] SK_ExtractSubvector is free if the subvector is at the
> start of the source vector
>
> Modified:
> llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
> llvm/trunk/test/Analysis/CostModel/X86/reduce-add.ll
> llvm/trunk/test/Analysis/CostModel/X86/reduce-and.ll
> llvm/trunk/test/Analysis/CostModel/X86/reduce-mul.ll
> llvm/trunk/test/Analysis/CostModel/X86/reduce-or.ll
> llvm/trunk/test/Analysis/CostModel/X86/reduce-smax.ll
> llvm/trunk/test/Analysis/CostModel/X86/reduce-smin.ll
> llvm/trunk/test/Analysis/CostModel/X86/reduce-umax.ll
> llvm/trunk/test/Analysis/CostModel/X86/reduce-umin.ll
> llvm/trunk/test/Analysis/CostModel/X86/reduce-xor.ll
> llvm/trunk/test/Analysis/CostModel/X86/reduction.ll
> llvm/trunk/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll
> llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Fri Nov 9
> 11:04:27 2018
> @@ -872,6 +872,12 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
> if (Kind == TTI::SK_Broadcast)
> LT.first = 1;
>
> + // Subvector extractions are free if they start at beginning of the
> + // vector.
> + if (Kind == TTI::SK_ExtractSubvector &&
> + ((Index % LT.second.getVectorNumElements()) == 0))
> + return 0;
> +
> // We are going to permute multiple sources and the result will be in
> multiple
> // destinations. Providing an accurate cost only for splits where the
> element
> // type remains the same.
> @@ -909,15 +915,15 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
> }
>
> static const CostTblEntry AVX512VBMIShuffleTbl[] = {
> - { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb
> - { TTI::SK_Reverse, MVT::v32i8, 1 }, // vpermb
> + {TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb
> + {TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb
>
> - { TTI::SK_PermuteSingleSrc, MVT::v64i8, 1 }, // vpermb
> - { TTI::SK_PermuteSingleSrc, MVT::v32i8, 1 }, // vpermb
> + {TTI::SK_PermuteSingleSrc, MVT::v64i8, 1}, // vpermb
> + {TTI::SK_PermuteSingleSrc, MVT::v32i8, 1}, // vpermb
>
> - { TTI::SK_PermuteTwoSrc, MVT::v64i8, 1 }, // vpermt2b
> - { TTI::SK_PermuteTwoSrc, MVT::v32i8, 1 }, // vpermt2b
> - { TTI::SK_PermuteTwoSrc, MVT::v16i8, 1 } // vpermt2b
> + {TTI::SK_PermuteTwoSrc, MVT::v64i8, 1}, // vpermt2b
> + {TTI::SK_PermuteTwoSrc, MVT::v32i8, 1}, // vpermt2b
> + {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1} // vpermt2b
> };
>
> if (ST->hasVBMI())
> @@ -926,25 +932,25 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
> return LT.first * Entry->Cost;
>
> static const CostTblEntry AVX512BWShuffleTbl[] = {
> - { TTI::SK_Broadcast, MVT::v32i16, 1 }, // vpbroadcastw
> - { TTI::SK_Broadcast, MVT::v64i8, 1 }, // vpbroadcastb
> + {TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw
> + {TTI::SK_Broadcast, MVT::v64i8, 1}, // vpbroadcastb
>
> - { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
> - { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
> - { TTI::SK_Reverse, MVT::v64i8, 2 }, // pshufb + vshufi64x2
> -
> - { TTI::SK_PermuteSingleSrc, MVT::v32i16, 1 }, // vpermw
> - { TTI::SK_PermuteSingleSrc, MVT::v16i16, 1 }, // vpermw
> - { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // vpermw
> - { TTI::SK_PermuteSingleSrc, MVT::v64i8, 8 }, // extend to v32i16
> - { TTI::SK_PermuteSingleSrc, MVT::v32i8, 3 }, // vpermw + zext/trunc
> -
> - { TTI::SK_PermuteTwoSrc, MVT::v32i16, 1 }, // vpermt2w
> - { TTI::SK_PermuteTwoSrc, MVT::v16i16, 1 }, // vpermt2w
> - { TTI::SK_PermuteTwoSrc, MVT::v8i16, 1 }, // vpermt2w
> - { TTI::SK_PermuteTwoSrc, MVT::v32i8, 3 }, // zext + vpermt2w +
> trunc
> - { TTI::SK_PermuteTwoSrc, MVT::v64i8, 19 }, // 6 * v32i8 + 1
> - { TTI::SK_PermuteTwoSrc, MVT::v16i8, 3 } // zext + vpermt2w +
> trunc
> + {TTI::SK_Reverse, MVT::v32i16, 1}, // vpermw
> + {TTI::SK_Reverse, MVT::v16i16, 1}, // vpermw
> + {TTI::SK_Reverse, MVT::v64i8, 2}, // pshufb + vshufi64x2
> +
> + {TTI::SK_PermuteSingleSrc, MVT::v32i16, 1}, // vpermw
> + {TTI::SK_PermuteSingleSrc, MVT::v16i16, 1}, // vpermw
> + {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // vpermw
> + {TTI::SK_PermuteSingleSrc, MVT::v64i8, 8}, // extend to v32i16
> + {TTI::SK_PermuteSingleSrc, MVT::v32i8, 3}, // vpermw + zext/trunc
> +
> + {TTI::SK_PermuteTwoSrc, MVT::v32i16, 1}, // vpermt2w
> + {TTI::SK_PermuteTwoSrc, MVT::v16i16, 1}, // vpermt2w
> + {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1}, // vpermt2w
> + {TTI::SK_PermuteTwoSrc, MVT::v32i8, 3}, // zext + vpermt2w + trunc
> + {TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, // 6 * v32i8 + 1
> + {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3} // zext + vpermt2w + trunc
> };
>
> if (ST->hasBWI())
> @@ -953,42 +959,42 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
> return LT.first * Entry->Cost;
>
> static const CostTblEntry AVX512ShuffleTbl[] = {
> - { TTI::SK_Broadcast, MVT::v8f64, 1 }, // vbroadcastpd
> - { TTI::SK_Broadcast, MVT::v16f32, 1 }, // vbroadcastps
> - { TTI::SK_Broadcast, MVT::v8i64, 1 }, // vpbroadcastq
> - { TTI::SK_Broadcast, MVT::v16i32, 1 }, // vpbroadcastd
> -
> - { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd
> - { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
> - { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq
> - { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
> -
> - { TTI::SK_PermuteSingleSrc, MVT::v8f64, 1 }, // vpermpd
> - { TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd
> - { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // vpermpd
> - { TTI::SK_PermuteSingleSrc, MVT::v16f32, 1 }, // vpermps
> - { TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps
> - { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // vpermps
> - { TTI::SK_PermuteSingleSrc, MVT::v8i64, 1 }, // vpermq
> - { TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq
> - { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // vpermq
> - { TTI::SK_PermuteSingleSrc, MVT::v16i32, 1 }, // vpermd
> - { TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd
> - { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 }, // vpermd
> - { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb
> -
> - { TTI::SK_PermuteTwoSrc, MVT::v8f64, 1 }, // vpermt2pd
> - { TTI::SK_PermuteTwoSrc, MVT::v16f32, 1 }, // vpermt2ps
> - { TTI::SK_PermuteTwoSrc, MVT::v8i64, 1 }, // vpermt2q
> - { TTI::SK_PermuteTwoSrc, MVT::v16i32, 1 }, // vpermt2d
> - { TTI::SK_PermuteTwoSrc, MVT::v4f64, 1 }, // vpermt2pd
> - { TTI::SK_PermuteTwoSrc, MVT::v8f32, 1 }, // vpermt2ps
> - { TTI::SK_PermuteTwoSrc, MVT::v4i64, 1 }, // vpermt2q
> - { TTI::SK_PermuteTwoSrc, MVT::v8i32, 1 }, // vpermt2d
> - { TTI::SK_PermuteTwoSrc, MVT::v2f64, 1 }, // vpermt2pd
> - { TTI::SK_PermuteTwoSrc, MVT::v4f32, 1 }, // vpermt2ps
> - { TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, // vpermt2q
> - { TTI::SK_PermuteTwoSrc, MVT::v4i32, 1 } // vpermt2d
> + {TTI::SK_Broadcast, MVT::v8f64, 1}, // vbroadcastpd
> + {TTI::SK_Broadcast, MVT::v16f32, 1}, // vbroadcastps
> + {TTI::SK_Broadcast, MVT::v8i64, 1}, // vpbroadcastq
> + {TTI::SK_Broadcast, MVT::v16i32, 1}, // vpbroadcastd
> +
> + {TTI::SK_Reverse, MVT::v8f64, 1}, // vpermpd
> + {TTI::SK_Reverse, MVT::v16f32, 1}, // vpermps
> + {TTI::SK_Reverse, MVT::v8i64, 1}, // vpermq
> + {TTI::SK_Reverse, MVT::v16i32, 1}, // vpermd
> +
> + {TTI::SK_PermuteSingleSrc, MVT::v8f64, 1}, // vpermpd
> + {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, // vpermpd
> + {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // vpermpd
> + {TTI::SK_PermuteSingleSrc, MVT::v16f32, 1}, // vpermps
> + {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1}, // vpermps
> + {TTI::SK_PermuteSingleSrc, MVT::v4f32, 1}, // vpermps
> + {TTI::SK_PermuteSingleSrc, MVT::v8i64, 1}, // vpermq
> + {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1}, // vpermq
> + {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, // vpermq
> + {TTI::SK_PermuteSingleSrc, MVT::v16i32, 1}, // vpermd
> + {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, // vpermd
> + {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // vpermd
> + {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb
> +
> + {TTI::SK_PermuteTwoSrc, MVT::v8f64, 1}, // vpermt2pd
> + {TTI::SK_PermuteTwoSrc, MVT::v16f32, 1}, // vpermt2ps
> + {TTI::SK_PermuteTwoSrc, MVT::v8i64, 1}, // vpermt2q
> + {TTI::SK_PermuteTwoSrc, MVT::v16i32, 1}, // vpermt2d
> + {TTI::SK_PermuteTwoSrc, MVT::v4f64, 1}, // vpermt2pd
> + {TTI::SK_PermuteTwoSrc, MVT::v8f32, 1}, // vpermt2ps
> + {TTI::SK_PermuteTwoSrc, MVT::v4i64, 1}, // vpermt2q
> + {TTI::SK_PermuteTwoSrc, MVT::v8i32, 1}, // vpermt2d
> + {TTI::SK_PermuteTwoSrc, MVT::v2f64, 1}, // vpermt2pd
> + {TTI::SK_PermuteTwoSrc, MVT::v4f32, 1}, // vpermt2ps
> + {TTI::SK_PermuteTwoSrc, MVT::v2i64, 1}, // vpermt2q
> + {TTI::SK_PermuteTwoSrc, MVT::v4i32, 1} // vpermt2d
> };
>
> if (ST->hasAVX512())
> @@ -996,40 +1002,40 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
> return LT.first * Entry->Cost;
>
> static const CostTblEntry AVX2ShuffleTbl[] = {
> - { TTI::SK_Broadcast, MVT::v4f64, 1 }, // vbroadcastpd
> - { TTI::SK_Broadcast, MVT::v8f32, 1 }, // vbroadcastps
> - { TTI::SK_Broadcast, MVT::v4i64, 1 }, // vpbroadcastq
> - { TTI::SK_Broadcast, MVT::v8i32, 1 }, // vpbroadcastd
> - { TTI::SK_Broadcast, MVT::v16i16, 1 }, // vpbroadcastw
> - { TTI::SK_Broadcast, MVT::v32i8, 1 }, // vpbroadcastb
> -
> - { TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd
> - { TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps
> - { TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq
> - { TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd
> - { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb
> - { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
> -
> - { TTI::SK_Select, MVT::v16i16, 1 }, // vpblendvb
> - { TTI::SK_Select, MVT::v32i8, 1 }, // vpblendvb
> -
> - { TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd
> - { TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps
> - { TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq
> - { TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd
> - { TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vperm2i128 +
> 2*vpshufb
> + {TTI::SK_Broadcast, MVT::v4f64, 1}, // vbroadcastpd
> + {TTI::SK_Broadcast, MVT::v8f32, 1}, // vbroadcastps
> + {TTI::SK_Broadcast, MVT::v4i64, 1}, // vpbroadcastq
> + {TTI::SK_Broadcast, MVT::v8i32, 1}, // vpbroadcastd
> + {TTI::SK_Broadcast, MVT::v16i16, 1}, // vpbroadcastw
> + {TTI::SK_Broadcast, MVT::v32i8, 1}, // vpbroadcastb
> +
> + {TTI::SK_Reverse, MVT::v4f64, 1}, // vpermpd
> + {TTI::SK_Reverse, MVT::v8f32, 1}, // vpermps
> + {TTI::SK_Reverse, MVT::v4i64, 1}, // vpermq
> + {TTI::SK_Reverse, MVT::v8i32, 1}, // vpermd
> + {TTI::SK_Reverse, MVT::v16i16, 2}, // vperm2i128 + pshufb
> + {TTI::SK_Reverse, MVT::v32i8, 2}, // vperm2i128 + pshufb
> +
> + {TTI::SK_Select, MVT::v16i16, 1}, // vpblendvb
> + {TTI::SK_Select, MVT::v32i8, 1}, // vpblendvb
> +
> + {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, // vpermpd
> + {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1}, // vpermps
> + {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1}, // vpermq
> + {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, // vpermd
> + {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vperm2i128 +
> 2*vpshufb
> // + vpblendvb
> - { TTI::SK_PermuteSingleSrc, MVT::v32i8, 4 }, // vperm2i128 +
> 2*vpshufb
> + {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, // vperm2i128 +
> 2*vpshufb
> // + vpblendvb
>
> - { TTI::SK_PermuteTwoSrc, MVT::v4f64, 3 }, // 2*vpermpd + vblendpd
> - { TTI::SK_PermuteTwoSrc, MVT::v8f32, 3 }, // 2*vpermps + vblendps
> - { TTI::SK_PermuteTwoSrc, MVT::v4i64, 3 }, // 2*vpermq + vpblendd
> - { TTI::SK_PermuteTwoSrc, MVT::v8i32, 3 }, // 2*vpermd + vpblendd
> - { TTI::SK_PermuteTwoSrc, MVT::v16i16, 7 }, // 2*vperm2i128 +
> 4*vpshufb
> - // + vpblendvb
> - { TTI::SK_PermuteTwoSrc, MVT::v32i8, 7 }, // 2*vperm2i128 +
> 4*vpshufb
> - // + vpblendvb
> + {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3}, // 2*vpermpd + vblendpd
> + {TTI::SK_PermuteTwoSrc, MVT::v8f32, 3}, // 2*vpermps + vblendps
> + {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3}, // 2*vpermq + vpblendd
> + {TTI::SK_PermuteTwoSrc, MVT::v8i32, 3}, // 2*vpermd + vpblendd
> + {TTI::SK_PermuteTwoSrc, MVT::v16i16, 7}, // 2*vperm2i128 + 4*vpshufb
> + // + vpblendvb
> + {TTI::SK_PermuteTwoSrc, MVT::v32i8, 7}, // 2*vperm2i128 + 4*vpshufb
> + // + vpblendvb
> };
>
> if (ST->hasAVX2())
> @@ -1037,21 +1043,21 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
> return LT.first * Entry->Cost;
>
> static const CostTblEntry XOPShuffleTbl[] = {
> - { TTI::SK_PermuteSingleSrc, MVT::v4f64, 2 }, // vperm2f128 +
> vpermil2pd
> - { TTI::SK_PermuteSingleSrc, MVT::v8f32, 2 }, // vperm2f128 +
> vpermil2ps
> - { TTI::SK_PermuteSingleSrc, MVT::v4i64, 2 }, // vperm2f128 +
> vpermil2pd
> - { TTI::SK_PermuteSingleSrc, MVT::v8i32, 2 }, // vperm2f128 +
> vpermil2ps
> - { TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vextractf128 +
> 2*vpperm
> - // + vinsertf128
> - { TTI::SK_PermuteSingleSrc, MVT::v32i8, 4 }, // vextractf128 +
> 2*vpperm
> - // + vinsertf128
> -
> - { TTI::SK_PermuteTwoSrc, MVT::v16i16, 9 }, // 2*vextractf128 +
> 6*vpperm
> - // + vinsertf128
> - { TTI::SK_PermuteTwoSrc, MVT::v8i16, 1 }, // vpperm
> - { TTI::SK_PermuteTwoSrc, MVT::v32i8, 9 }, // 2*vextractf128 +
> 6*vpperm
> - // + vinsertf128
> - { TTI::SK_PermuteTwoSrc, MVT::v16i8, 1 }, // vpperm
> + {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, // vperm2f128 +
> vpermil2pd
> + {TTI::SK_PermuteSingleSrc, MVT::v8f32, 2}, // vperm2f128 +
> vpermil2ps
> + {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2}, // vperm2f128 +
> vpermil2pd
> + {TTI::SK_PermuteSingleSrc, MVT::v8i32, 2}, // vperm2f128 +
> vpermil2ps
> + {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vextractf128 +
> 2*vpperm
> + // + vinsertf128
> + {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, // vextractf128 +
> 2*vpperm
> + // + vinsertf128
> +
> + {TTI::SK_PermuteTwoSrc, MVT::v16i16, 9}, // 2*vextractf128 +
> 6*vpperm
> + // + vinsertf128
> + {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1}, // vpperm
> + {TTI::SK_PermuteTwoSrc, MVT::v32i8, 9}, // 2*vextractf128 +
> 6*vpperm
> + // + vinsertf128
> + {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1}, // vpperm
> };
>
> if (ST->hasXOP())
> @@ -1059,46 +1065,46 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
> return LT.first * Entry->Cost;
>
> static const CostTblEntry AVX1ShuffleTbl[] = {
> - { TTI::SK_Broadcast, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
> - { TTI::SK_Broadcast, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
> - { TTI::SK_Broadcast, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
> - { TTI::SK_Broadcast, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
> - { TTI::SK_Broadcast, MVT::v16i16, 3 }, // vpshuflw + vpshufd +
> vinsertf128
> - { TTI::SK_Broadcast, MVT::v32i8, 2 }, // vpshufb + vinsertf128
> -
> - { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
> - { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
> - { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
> - { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
> - { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
> - // + vinsertf128
> - { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb
> - // + vinsertf128
> -
> - { TTI::SK_Select, MVT::v4i64, 1 }, // vblendpd
> - { TTI::SK_Select, MVT::v4f64, 1 }, // vblendpd
> - { TTI::SK_Select, MVT::v8i32, 1 }, // vblendps
> - { TTI::SK_Select, MVT::v8f32, 1 }, // vblendps
> - { TTI::SK_Select, MVT::v16i16, 3 }, // vpand + vpandn + vpor
> - { TTI::SK_Select, MVT::v32i8, 3 }, // vpand + vpandn + vpor
> -
> - { TTI::SK_PermuteSingleSrc, MVT::v4f64, 2 }, // vperm2f128 + vshufpd
> - { TTI::SK_PermuteSingleSrc, MVT::v4i64, 2 }, // vperm2f128 + vshufpd
> - { TTI::SK_PermuteSingleSrc, MVT::v8f32, 4 }, // 2*vperm2f128 +
> 2*vshufps
> - { TTI::SK_PermuteSingleSrc, MVT::v8i32, 4 }, // 2*vperm2f128 +
> 2*vshufps
> - { TTI::SK_PermuteSingleSrc, MVT::v16i16, 8 }, // vextractf128 +
> 4*pshufb
> + {TTI::SK_Broadcast, MVT::v4f64, 2}, // vperm2f128 + vpermilpd
> + {TTI::SK_Broadcast, MVT::v8f32, 2}, // vperm2f128 + vpermilps
> + {TTI::SK_Broadcast, MVT::v4i64, 2}, // vperm2f128 + vpermilpd
> + {TTI::SK_Broadcast, MVT::v8i32, 2}, // vperm2f128 + vpermilps
> + {TTI::SK_Broadcast, MVT::v16i16, 3}, // vpshuflw + vpshufd +
> vinsertf128
> + {TTI::SK_Broadcast, MVT::v32i8, 2}, // vpshufb + vinsertf128
> +
> + {TTI::SK_Reverse, MVT::v4f64, 2}, // vperm2f128 + vpermilpd
> + {TTI::SK_Reverse, MVT::v8f32, 2}, // vperm2f128 + vpermilps
> + {TTI::SK_Reverse, MVT::v4i64, 2}, // vperm2f128 + vpermilpd
> + {TTI::SK_Reverse, MVT::v8i32, 2}, // vperm2f128 + vpermilps
> + {TTI::SK_Reverse, MVT::v16i16, 4}, // vextractf128 + 2*pshufb
> + // + vinsertf128
> + {TTI::SK_Reverse, MVT::v32i8, 4}, // vextractf128 + 2*pshufb
> + // + vinsertf128
> +
> + {TTI::SK_Select, MVT::v4i64, 1}, // vblendpd
> + {TTI::SK_Select, MVT::v4f64, 1}, // vblendpd
> + {TTI::SK_Select, MVT::v8i32, 1}, // vblendps
> + {TTI::SK_Select, MVT::v8f32, 1}, // vblendps
> + {TTI::SK_Select, MVT::v16i16, 3}, // vpand + vpandn + vpor
> + {TTI::SK_Select, MVT::v32i8, 3}, // vpand + vpandn + vpor
> +
> + {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, // vperm2f128 + vshufpd
> + {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2}, // vperm2f128 + vshufpd
> + {TTI::SK_PermuteSingleSrc, MVT::v8f32, 4}, // 2*vperm2f128 +
> 2*vshufps
> + {TTI::SK_PermuteSingleSrc, MVT::v8i32, 4}, // 2*vperm2f128 +
> 2*vshufps
> + {TTI::SK_PermuteSingleSrc, MVT::v16i16, 8}, // vextractf128 +
> 4*pshufb
> // + 2*por + vinsertf128
> - { TTI::SK_PermuteSingleSrc, MVT::v32i8, 8 }, // vextractf128 +
> 4*pshufb
> + {TTI::SK_PermuteSingleSrc, MVT::v32i8, 8}, // vextractf128 +
> 4*pshufb
> // + 2*por + vinsertf128
>
> - { TTI::SK_PermuteTwoSrc, MVT::v4f64, 3 }, // 2*vperm2f128 +
> vshufpd
> - { TTI::SK_PermuteTwoSrc, MVT::v4i64, 3 }, // 2*vperm2f128 +
> vshufpd
> - { TTI::SK_PermuteTwoSrc, MVT::v8f32, 4 }, // 2*vperm2f128 +
> 2*vshufps
> - { TTI::SK_PermuteTwoSrc, MVT::v8i32, 4 }, // 2*vperm2f128 +
> 2*vshufps
> - { TTI::SK_PermuteTwoSrc, MVT::v16i16, 15 }, // 2*vextractf128 +
> 8*pshufb
> - // + 4*por +
> vinsertf128
> - { TTI::SK_PermuteTwoSrc, MVT::v32i8, 15 }, // 2*vextractf128 +
> 8*pshufb
> - // + 4*por +
> vinsertf128
> + {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3}, // 2*vperm2f128 + vshufpd
> + {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3}, // 2*vperm2f128 + vshufpd
> + {TTI::SK_PermuteTwoSrc, MVT::v8f32, 4}, // 2*vperm2f128 +
> 2*vshufps
> + {TTI::SK_PermuteTwoSrc, MVT::v8i32, 4}, // 2*vperm2f128 +
> 2*vshufps
> + {TTI::SK_PermuteTwoSrc, MVT::v16i16, 15}, // 2*vextractf128 +
> 8*pshufb
> + // + 4*por + vinsertf128
> + {TTI::SK_PermuteTwoSrc, MVT::v32i8, 15}, // 2*vextractf128 +
> 8*pshufb
> + // + 4*por + vinsertf128
> };
>
> if (ST->hasAVX())
> @@ -1106,12 +1112,12 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
> return LT.first * Entry->Cost;
>
> static const CostTblEntry SSE41ShuffleTbl[] = {
> - { TTI::SK_Select, MVT::v2i64, 1 }, // pblendw
> - { TTI::SK_Select, MVT::v2f64, 1 }, // movsd
> - { TTI::SK_Select, MVT::v4i32, 1 }, // pblendw
> - { TTI::SK_Select, MVT::v4f32, 1 }, // blendps
> - { TTI::SK_Select, MVT::v8i16, 1 }, // pblendw
> - { TTI::SK_Select, MVT::v16i8, 1 } // pblendvb
> + {TTI::SK_Select, MVT::v2i64, 1}, // pblendw
> + {TTI::SK_Select, MVT::v2f64, 1}, // movsd
> + {TTI::SK_Select, MVT::v4i32, 1}, // pblendw
> + {TTI::SK_Select, MVT::v4f32, 1}, // blendps
> + {TTI::SK_Select, MVT::v8i16, 1}, // pblendw
> + {TTI::SK_Select, MVT::v16i8, 1} // pblendvb
> };
>
> if (ST->hasSSE41())
> @@ -1119,20 +1125,20 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
> return LT.first * Entry->Cost;
>
> static const CostTblEntry SSSE3ShuffleTbl[] = {
> - { TTI::SK_Broadcast, MVT::v8i16, 1 }, // pshufb
> - { TTI::SK_Broadcast, MVT::v16i8, 1 }, // pshufb
> + {TTI::SK_Broadcast, MVT::v8i16, 1}, // pshufb
> + {TTI::SK_Broadcast, MVT::v16i8, 1}, // pshufb
>
> - { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb
> - { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
> + {TTI::SK_Reverse, MVT::v8i16, 1}, // pshufb
> + {TTI::SK_Reverse, MVT::v16i8, 1}, // pshufb
>
> - { TTI::SK_Select, MVT::v8i16, 3 }, // 2*pshufb + por
> - { TTI::SK_Select, MVT::v16i8, 3 }, // 2*pshufb + por
> + {TTI::SK_Select, MVT::v8i16, 3}, // 2*pshufb + por
> + {TTI::SK_Select, MVT::v16i8, 3}, // 2*pshufb + por
>
> - { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // pshufb
> - { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb
> + {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // pshufb
> + {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb
>
> - { TTI::SK_PermuteTwoSrc, MVT::v8i16, 3 }, // 2*pshufb + por
> - { TTI::SK_PermuteTwoSrc, MVT::v16i8, 3 }, // 2*pshufb + por
> + {TTI::SK_PermuteTwoSrc, MVT::v8i16, 3}, // 2*pshufb + por
> + {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3}, // 2*pshufb + por
> };
>
> if (ST->hasSSSE3())
> @@ -1140,29 +1146,29 @@ int X86TTIImpl::getShuffleCost(TTI::Shuf
> return LT.first * Entry->Cost;
>
> static const CostTblEntry SSE2ShuffleTbl[] = {
> - { TTI::SK_Broadcast, MVT::v2f64, 1 }, // shufpd
> - { TTI::SK_Broadcast, MVT::v2i64, 1 }, // pshufd
> - { TTI::SK_Broadcast, MVT::v4i32, 1 }, // pshufd
> - { TTI::SK_Broadcast, MVT::v8i16, 2 }, // pshuflw + pshufd
> - { TTI::SK_Broadcast, MVT::v16i8, 3 }, // unpck + pshuflw + pshufd
> -
> - { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd
> - { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd
> - { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd
> - { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
> - { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw
> - // + 2*pshufd + 2*unpck +
> packus
> -
> - { TTI::SK_Select, MVT::v2i64, 1 }, // movsd
> - { TTI::SK_Select, MVT::v2f64, 1 }, // movsd
> - { TTI::SK_Select, MVT::v4i32, 2 }, // 2*shufps
> - { TTI::SK_Select, MVT::v8i16, 3 }, // pand + pandn + por
> - { TTI::SK_Select, MVT::v16i8, 3 }, // pand + pandn + por
> -
> - { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // shufpd
> - { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // pshufd
> - { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 }, // pshufd
> - { TTI::SK_PermuteSingleSrc, MVT::v8i16, 5 }, // 2*pshuflw + 2*pshufhw
> + {TTI::SK_Broadcast, MVT::v2f64, 1}, // shufpd
> + {TTI::SK_Broadcast, MVT::v2i64, 1}, // pshufd
> + {TTI::SK_Broadcast, MVT::v4i32, 1}, // pshufd
> + {TTI::SK_Broadcast, MVT::v8i16, 2}, // pshuflw + pshufd
> + {TTI::SK_Broadcast, MVT::v16i8, 3}, // unpck + pshuflw + pshufd
> +
> + {TTI::SK_Reverse, MVT::v2f64, 1}, // shufpd
> + {TTI::SK_Reverse, MVT::v2i64, 1}, // pshufd
> + {TTI::SK_Reverse, MVT::v4i32, 1}, // pshufd
> + {TTI::SK_Reverse, MVT::v8i16, 3}, // pshuflw + pshufhw + pshufd
> + {TTI::SK_Reverse, MVT::v16i8, 9}, // 2*pshuflw + 2*pshufhw
> + // + 2*pshufd + 2*unpck + packus
> +
> + {TTI::SK_Select, MVT::v2i64, 1}, // movsd
> + {TTI::SK_Select, MVT::v2f64, 1}, // movsd
> + {TTI::SK_Select, MVT::v4i32, 2}, // 2*shufps
> + {TTI::SK_Select, MVT::v8i16, 3}, // pand + pandn + por
> + {TTI::SK_Select, MVT::v16i8, 3}, // pand + pandn + por
> +
> + {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // shufpd
> + {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, // pshufd
> + {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // pshufd
> + {TTI::SK_PermuteSingleSrc, MVT::v8i16, 5}, // 2*pshuflw + 2*pshufhw
> // + pshufd/unpck
> { TTI::SK_PermuteSingleSrc, MVT::v16i8, 10 }, // 2*pshuflw + 2*pshufhw
> // + 2*pshufd + 2*unpck
> + 2*packus
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-add.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-add.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-add.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-add.ll Fri Nov 9
> 11:04:27 2018
> @@ -12,17 +12,17 @@ define i32 @reduce_i64(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i64'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i64'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i64'
> @@ -46,7 +46,7 @@ define i32 @reduce_i64(i32 %arg) {
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>
> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>
> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i64
> @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
> -; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x
> i64> undef)
> @@ -61,17 +61,17 @@ define i32 @reduce_i32(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i32'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i32'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i32'
> @@ -95,7 +95,7 @@ define i32 @reduce_i32(i32 %arg) {
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>
> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>
> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i32
> @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
> -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x
> i32> undef)
> @@ -110,17 +110,17 @@ define i32 @reduce_i16(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i16'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i16'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i16'
> @@ -135,24 +135,24 @@ define i32 @reduce_i16(i32 %arg) {
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i16'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512F-LABEL: 'reduce_i16'
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512BW-LABEL: 'reduce_i16'
> @@ -160,15 +160,15 @@ define i32 @reduce_i16(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 45 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i16'
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x
> i16> undef)
> @@ -183,49 +183,49 @@ define i32 @reduce_i8(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i8'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i8'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i8'
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i8'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 92 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 118 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 116 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i8'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512F-LABEL: 'reduce_i8'
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8
> x i8> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 57 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 60 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 65 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512BW-LABEL: 'reduce_i8'
> @@ -233,15 +233,15 @@ define i32 @reduce_i8(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 118 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 121 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i8'
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8
> x i8> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 57 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 60 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 65 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 63 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8>
> undef)
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-and.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-and.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-and.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-and.ll Fri Nov 9
> 11:04:27 2018
> @@ -12,25 +12,25 @@ define i32 @reduce_i64(i32 %arg) {
> ; SSE-LABEL: 'reduce_i64'
> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64>
> undef)
> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>
> undef)
> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i64'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i64'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512-LABEL: 'reduce_i64'
> @@ -38,7 +38,7 @@ define i32 @reduce_i64(i32 %arg) {
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>
> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>
> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i64
> @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef)
> -; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef)
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x
> i64> undef)
> @@ -53,25 +53,25 @@ define i32 @reduce_i32(i32 %arg) {
> ; SSE-LABEL: 'reduce_i32'
> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32>
> undef)
> ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>
> undef)
> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i32'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i32'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512-LABEL: 'reduce_i32'
> @@ -79,7 +79,7 @@ define i32 @reduce_i32(i32 %arg) {
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32>
> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i32
> @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i32
> @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef)
> -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef)
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x
> i32> undef)
> @@ -94,49 +94,49 @@ define i32 @reduce_i16(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i16'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i16'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i16'
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i16'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 55 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i16'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512F-LABEL: 'reduce_i16'
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512BW-LABEL: 'reduce_i16'
> @@ -144,15 +144,15 @@ define i32 @reduce_i16(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 45 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i16'
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x
> i16> undef)
> @@ -167,49 +167,49 @@ define i32 @reduce_i8(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i8'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i8'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i8'
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i8'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 85 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i8'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512F-LABEL: 'reduce_i8'
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8
> x i8> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 57 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 60 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 65 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512BW-LABEL: 'reduce_i8'
> @@ -217,15 +217,15 @@ define i32 @reduce_i8(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 118 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 121 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i8'
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8
> x i8> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 57 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 60 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 65 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 63 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8>
> undef)
> @@ -243,9 +243,9 @@ define i32 @reduce_i1(i32 %arg) {
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i1'
> @@ -254,9 +254,9 @@ define i32 @reduce_i1(i32 %arg) {
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i1'
> @@ -265,9 +265,9 @@ define i32 @reduce_i1(i32 %arg) {
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i1'
> @@ -277,8 +277,8 @@ define i32 @reduce_i1(i32 %arg) {
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 85 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i1'
> @@ -288,8 +288,8 @@ define i32 @reduce_i1(i32 %arg) {
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512F-LABEL: 'reduce_i1'
> @@ -298,9 +298,9 @@ define i32 @reduce_i1(i32 %arg) {
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for
> instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4
> x i1> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for
> instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8
> x i1> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 151 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 156 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 165 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 150 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 154 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 162 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512BW-LABEL: 'reduce_i1'
> @@ -311,7 +311,7 @@ define i32 @reduce_i1(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 357 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 838 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 841 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 840 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i1'
> @@ -320,9 +320,9 @@ define i32 @reduce_i1(i32 %arg) {
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for
> instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4
> x i1> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for
> instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8
> x i1> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 151 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 156 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 165 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 150 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 154 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 162 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1>
> undef)
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-mul.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-mul.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-mul.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-mul.ll Fri Nov 9
> 11:04:27 2018
> @@ -12,25 +12,25 @@ define i32 @reduce_i64(i32 %arg) {
> ; SSE-LABEL: 'reduce_i64'
> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64>
> undef)
> ; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 61 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 126 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 59 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 123 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>
> undef)
> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i64'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 154 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 152 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i64'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512F-LABEL: 'reduce_i64'
> @@ -38,7 +38,7 @@ define i32 @reduce_i64(i32 %arg) {
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for
> instruction: %V2 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for
> instruction: %V4 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for
> instruction: %V8 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 51 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512BW-LABEL: 'reduce_i64'
> @@ -46,7 +46,7 @@ define i32 @reduce_i64(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for
> instruction: %V2 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for
> instruction: %V4 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 35 for
> instruction: %V8 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 51 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i64'
> @@ -54,7 +54,7 @@ define i32 @reduce_i64(i32 %arg) {
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for
> instruction: %V2 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x
> i64> undef)
> @@ -69,41 +69,41 @@ define i32 @reduce_i32(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i32'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 105 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 102 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i32'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 105 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 102 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i32'
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i32'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i32'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512F-LABEL: 'reduce_i32'
> @@ -111,7 +111,7 @@ define i32 @reduce_i32(i32 %arg) {
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512BW-LABEL: 'reduce_i32'
> @@ -119,7 +119,7 @@ define i32 @reduce_i32(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i32'
> @@ -127,7 +127,7 @@ define i32 @reduce_i32(i32 %arg) {
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x
> i32> undef)
> @@ -142,49 +142,49 @@ define i32 @reduce_i16(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i16'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i16'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i16'
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i16'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i16'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512F-LABEL: 'reduce_i16'
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512BW-LABEL: 'reduce_i16'
> @@ -192,15 +192,15 @@ define i32 @reduce_i16(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 45 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i16'
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x
> i16> undef)
> @@ -215,49 +215,49 @@ define i32 @reduce_i8(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i8'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 129 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 178 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 275 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 176 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 272 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i8'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 93 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 142 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 239 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 236 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i8'
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 93 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 142 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 239 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 92 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 140 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 236 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i8'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 202 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 255 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 360 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 254 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 358 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i8'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 137 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 172 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 241 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 171 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 239 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512F-LABEL: 'reduce_i8'
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8
> x i8> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 117 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 144 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 197 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 143 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 195 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512BW-LABEL: 'reduce_i8'
> @@ -265,15 +265,15 @@ define i32 @reduce_i8(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 67 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 178 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 201 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 200 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i8'
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8
> x i8> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 40 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 117 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 144 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 197 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 143 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 195 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8>
> undef)
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-or.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-or.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-or.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-or.ll Fri Nov 9
> 11:04:27 2018
> @@ -12,25 +12,25 @@ define i32 @reduce_i64(i32 %arg) {
> ; SSE-LABEL: 'reduce_i64'
> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64>
> undef)
> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>
> undef)
> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i64'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i64'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512-LABEL: 'reduce_i64'
> @@ -38,7 +38,7 @@ define i32 @reduce_i64(i32 %arg) {
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>
> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>
> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i64
> @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef)
> -; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef)
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64>
> undef)
> @@ -53,25 +53,25 @@ define i32 @reduce_i32(i32 %arg) {
> ; SSE-LABEL: 'reduce_i32'
> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>
> undef)
> ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>
> undef)
> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i32'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i32'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512-LABEL: 'reduce_i32'
> @@ -79,7 +79,7 @@ define i32 @reduce_i32(i32 %arg) {
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>
> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i32
> @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i32
> @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef)
> -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef)
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>
> undef)
> @@ -94,49 +94,49 @@ define i32 @reduce_i16(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i16'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i16'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i16'
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i16'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 55 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i16'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512F-LABEL: 'reduce_i16'
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512BW-LABEL: 'reduce_i16'
> @@ -144,15 +144,15 @@ define i32 @reduce_i16(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 45 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i16'
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>
> undef)
> @@ -167,49 +167,49 @@ define i32 @reduce_i8(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i8'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i8'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i8'
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i8'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 85 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i8'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512F-LABEL: 'reduce_i8'
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x
> i8> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 57 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 60 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 65 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512BW-LABEL: 'reduce_i8'
> @@ -217,15 +217,15 @@ define i32 @reduce_i8(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 118 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 121 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i8'
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x
> i8> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 57 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 60 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 65 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 63 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8>
> undef)
> @@ -243,9 +243,9 @@ define i32 @reduce_i1(i32 %arg) {
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i1'
> @@ -254,9 +254,9 @@ define i32 @reduce_i1(i32 %arg) {
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i1'
> @@ -265,9 +265,9 @@ define i32 @reduce_i1(i32 %arg) {
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i1'
> @@ -277,8 +277,8 @@ define i32 @reduce_i1(i32 %arg) {
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 85 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i1'
> @@ -288,8 +288,8 @@ define i32 @reduce_i1(i32 %arg) {
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512F-LABEL: 'reduce_i1'
> @@ -298,9 +298,9 @@ define i32 @reduce_i1(i32 %arg) {
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for
> instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x
> i1> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for
> instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x
> i1> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 151 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 156 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 165 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 150 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 154 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 162 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512BW-LABEL: 'reduce_i1'
> @@ -311,7 +311,7 @@ define i32 @reduce_i1(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 357 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 838 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 841 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 840 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i1'
> @@ -320,9 +320,9 @@ define i32 @reduce_i1(i32 %arg) {
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for
> instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x
> i1> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for
> instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x
> i1> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 151 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 156 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 165 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 150 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 154 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 162 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1>
> undef)
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-smax.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-smax.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-smax.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-smax.ll Fri Nov 9
> 11:04:27 2018
> @@ -12,17 +12,17 @@ define i32 @reduce_i64(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i64'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 146 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 143 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i64'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 146 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 71 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 143 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i64'
> @@ -37,8 +37,8 @@ define i32 @reduce_i64(i32 %arg) {
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i64'
> @@ -69,17 +69,17 @@ define i32 @reduce_i32(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i32'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i32'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i32'
> @@ -126,17 +126,17 @@ define i32 @reduce_i16(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i16'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i16'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i16'
> @@ -151,8 +151,8 @@ define i32 @reduce_i16(i32 %arg) {
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 133 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 111 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 131 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i16'
> @@ -176,7 +176,7 @@ define i32 @reduce_i16(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 112 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 117 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 116 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i16'
> @@ -199,33 +199,33 @@ define i32 @reduce_i8(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i8'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 97 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 102 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 111 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i8'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 75 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i8'
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 75 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 92 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i8'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 162 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 173 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i8'
> @@ -249,7 +249,7 @@ define i32 @reduce_i8(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 253 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 258 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 257 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i8'
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-smin.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-smin.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-smin.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-smin.ll Fri Nov 9
> 11:04:27 2018
> @@ -12,17 +12,17 @@ define i32 @reduce_i64(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i64'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 146 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 143 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i64'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 146 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 71 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 143 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i64'
> @@ -37,8 +37,8 @@ define i32 @reduce_i64(i32 %arg) {
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i64'
> @@ -69,17 +69,17 @@ define i32 @reduce_i32(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i32'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i32'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i32'
> @@ -126,17 +126,17 @@ define i32 @reduce_i16(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i16'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i16'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i16'
> @@ -151,8 +151,8 @@ define i32 @reduce_i16(i32 %arg) {
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 133 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 111 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 131 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i16'
> @@ -176,7 +176,7 @@ define i32 @reduce_i16(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 112 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 117 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 116 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i16'
> @@ -199,33 +199,33 @@ define i32 @reduce_i8(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i8'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 97 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 102 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 111 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i8'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 75 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i8'
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 75 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 92 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i8'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 162 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 173 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i8'
> @@ -249,7 +249,7 @@ define i32 @reduce_i8(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 253 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 258 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 257 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i8'
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-umax.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-umax.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-umax.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-umax.ll Fri Nov 9
> 11:04:27 2018
> @@ -12,17 +12,17 @@ define i32 @reduce_i64(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i64'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 146 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 143 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i64'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 146 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 71 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 143 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i64'
> @@ -37,8 +37,8 @@ define i32 @reduce_i64(i32 %arg) {
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i64'
> @@ -69,17 +69,17 @@ define i32 @reduce_i32(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i32'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i32'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i32'
> @@ -126,17 +126,17 @@ define i32 @reduce_i16(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i16'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i16'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i16'
> @@ -151,8 +151,8 @@ define i32 @reduce_i16(i32 %arg) {
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 133 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 111 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 131 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i16'
> @@ -176,7 +176,7 @@ define i32 @reduce_i16(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 112 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 117 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 116 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i16'
> @@ -199,33 +199,33 @@ define i32 @reduce_i8(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i8'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 97 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 102 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 111 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i8'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 75 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i8'
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 75 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 92 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i8'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 162 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 173 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i8'
> @@ -249,7 +249,7 @@ define i32 @reduce_i8(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 253 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 258 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 257 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i8'
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-umin.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-umin.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-umin.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-umin.ll Fri Nov 9
> 11:04:27 2018
> @@ -12,17 +12,17 @@ define i32 @reduce_i64(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i64'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 146 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 143 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i64'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 146 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 71 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 143 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i64'
> @@ -37,8 +37,8 @@ define i32 @reduce_i64(i32 %arg) {
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i64'
> @@ -69,17 +69,17 @@ define i32 @reduce_i32(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i32'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i32'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i32'
> @@ -126,17 +126,17 @@ define i32 @reduce_i16(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i16'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i16'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i16'
> @@ -151,8 +151,8 @@ define i32 @reduce_i16(i32 %arg) {
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 133 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 111 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 131 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i16'
> @@ -176,7 +176,7 @@ define i32 @reduce_i16(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 112 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 117 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 116 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i16'
> @@ -199,33 +199,33 @@ define i32 @reduce_i8(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i8'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 97 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 102 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 111 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i8'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 75 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i8'
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 75 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 92 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 73 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i8'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 162 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 173 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i8'
> @@ -249,7 +249,7 @@ define i32 @reduce_i8(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 253 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 258 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 257 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i8'
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduce-xor.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduce-xor.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduce-xor.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-xor.ll Fri Nov 9
> 11:04:27 2018
> @@ -12,25 +12,25 @@ define i32 @reduce_i64(i32 %arg) {
> ; SSE-LABEL: 'reduce_i64'
> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64>
> undef)
> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>
> undef)
> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i64'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i64'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512-LABEL: 'reduce_i64'
> @@ -38,7 +38,7 @@ define i32 @reduce_i64(i32 %arg) {
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>
> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64>
> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i64
> @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef)
> -; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef)
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for
> instruction: %V16 = call i64
> @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x
> i64> undef)
> @@ -53,25 +53,25 @@ define i32 @reduce_i32(i32 %arg) {
> ; SSE-LABEL: 'reduce_i32'
> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32>
> undef)
> ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>
> undef)
> -; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>
> undef)
> +; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>
> undef)
> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i32'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i32'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512-LABEL: 'reduce_i32'
> @@ -79,7 +79,7 @@ define i32 @reduce_i32(i32 %arg) {
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>
> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i32
> @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i32
> @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef)
> -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef)
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for
> instruction: %V32 = call i32
> @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef)
> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x
> i32> undef)
> @@ -94,49 +94,49 @@ define i32 @reduce_i16(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i16'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i16'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i16'
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i16'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 55 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i16'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction:
> %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction:
> %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512F-LABEL: 'reduce_i16'
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512BW-LABEL: 'reduce_i16'
> @@ -144,15 +144,15 @@ define i32 @reduce_i16(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 45 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i16'
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for
> instruction: %V4 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for
> instruction: %V16 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 39 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 44 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 38 for
> instruction: %V32 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for
> instruction: %V64 = call i16
> @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x
> i16> undef)
> @@ -167,49 +167,49 @@ define i32 @reduce_i8(i32 %arg) {
> ; SSE2-LABEL: 'reduce_i8'
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i8'
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i8'
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i8'
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 85 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i8'
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction:
> %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction:
> %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction:
> %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512F-LABEL: 'reduce_i8'
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8
> x i8> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 57 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 60 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 65 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512BW-LABEL: 'reduce_i8'
> @@ -217,15 +217,15 @@ define i32 @reduce_i8(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 118 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 121 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i8'
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for
> instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8
> x i8> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for
> instruction: %V16 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 57 for
> instruction: %V32 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 60 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 65 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for
> instruction: %V64 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 63 for
> instruction: %V128 = call i8
> @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8>
> undef)
> @@ -243,9 +243,9 @@ define i32 @reduce_i1(i32 %arg) {
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSSE3-LABEL: 'reduce_i1'
> @@ -254,9 +254,9 @@ define i32 @reduce_i1(i32 %arg) {
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; SSE42-LABEL: 'reduce_i1'
> @@ -265,9 +265,9 @@ define i32 @reduce_i1(i32 %arg) {
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> -; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX1-LABEL: 'reduce_i1'
> @@ -277,8 +277,8 @@ define i32 @reduce_i1(i32 %arg) {
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> -; AVX1-NEXT: Cost Model: Found an estimated cost of 85 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX2-LABEL: 'reduce_i1'
> @@ -288,8 +288,8 @@ define i32 @reduce_i1(i32 %arg) {
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction:
> %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction:
> %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> -; AVX2-NEXT: Cost Model: Found an estimated cost of 65 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction:
> %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>
> undef)
> +; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction:
> %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>
> undef)
> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> ;
> ; AVX512F-LABEL: 'reduce_i1'
> @@ -298,9 +298,9 @@ define i32 @reduce_i1(i32 %arg) {
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for
> instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4
> x i1> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for
> instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8
> x i1> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 151 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 156 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 165 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 150 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 154 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 162 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512BW-LABEL: 'reduce_i1'
> @@ -311,7 +311,7 @@ define i32 @reduce_i1(i32 %arg) {
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 357 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 838 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 841 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 840 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> ; AVX512DQ-LABEL: 'reduce_i1'
> @@ -320,9 +320,9 @@ define i32 @reduce_i1(i32 %arg) {
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for
> instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4
> x i1> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for
> instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8
> x i1> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 148 for
> instruction: %V16 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 151 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 156 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
> -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 165 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 150 for
> instruction: %V32 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 154 for
> instruction: %V64 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 162 for
> instruction: %V128 = call i1
> @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for
> instruction: ret i32 undef
> ;
> %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1>
> undef)
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/reduction.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/reduction.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/reduction.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/reduction.ll Fri Nov 9
> 11:04:27 2018
> @@ -59,7 +59,7 @@ define fastcc i32 @reduction_cost_int(<8
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %r = extractelement <8 x i32> %bin.rdx.3, i32 0
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %r = extractelement <8 x i32> %bin.rdx.3, i32 0
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 %r
> ;
> ; SSSE3-LABEL: 'reduction_cost_int'
> @@ -69,7 +69,7 @@ define fastcc i32 @reduction_cost_int(<8
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %r = extractelement <8 x i32> %bin.rdx.3, i32 0
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %r = extractelement <8 x i32> %bin.rdx.3, i32 0
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 %r
> ;
> ; SSE42-LABEL: 'reduction_cost_int'
> @@ -376,7 +376,7 @@ define fastcc double @no_pairwise_reduct
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef>
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction:
> %r = extractelement <4 x double> %bin.rdx8, i32 0
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %r = extractelement <4 x double> %bin.rdx8, i32 0
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret double %r
> ;
> ; SSSE3-LABEL: 'no_pairwise_reduction4double'
> @@ -384,7 +384,7 @@ define fastcc double @no_pairwise_reduct
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef>
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction:
> %r = extractelement <4 x double> %bin.rdx8, i32 0
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction:
> %r = extractelement <4 x double> %bin.rdx8, i32 0
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret double %r
> ;
> ; SSE42-LABEL: 'no_pairwise_reduction4double'
> @@ -428,7 +428,7 @@ define fastcc float @no_pairwise_reducti
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %r = extractelement <8 x float> %bin.rdx8, i32 0
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction:
> %r = extractelement <8 x float> %bin.rdx8, i32 0
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret float %r
> ;
> ; SSSE3-LABEL: 'no_pairwise_reduction8float'
> @@ -438,7 +438,7 @@ define fastcc float @no_pairwise_reducti
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %r = extractelement <8 x float> %bin.rdx8, i32 0
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction:
> %r = extractelement <8 x float> %bin.rdx8, i32 0
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret float %r
> ;
> ; SSE42-LABEL: 'no_pairwise_reduction8float'
> @@ -562,7 +562,7 @@ define fastcc i64 @no_pairwise_reduction
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx = add <4 x i64> %rdx, %rdx.shuf
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32>
> <i32 1, i32 undef, i32 undef, i32 undef>
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction:
> %r = extractelement <4 x i64> %bin.rdx8, i32 0
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction:
> %r = extractelement <4 x i64> %bin.rdx8, i32 0
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i64 %r
> ;
> ; SSSE3-LABEL: 'no_pairwise_reduction4i64'
> @@ -570,7 +570,7 @@ define fastcc i64 @no_pairwise_reduction
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx = add <4 x i64> %rdx, %rdx.shuf
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32>
> <i32 1, i32 undef, i32 undef, i32 undef>
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction:
> %r = extractelement <4 x i64> %bin.rdx8, i32 0
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction:
> %r = extractelement <4 x i64> %bin.rdx8, i32 0
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i64 %r
> ;
> ; SSE42-LABEL: 'no_pairwise_reduction4i64'
> @@ -666,7 +666,7 @@ define fastcc i32 @no_pairwise_reduction
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32>
> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef>
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %r = extractelement <8 x i32> %bin.rdx8, i32 0
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %r = extractelement <8 x i32> %bin.rdx8, i32 0
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 %r
> ;
> ; SSSE3-LABEL: 'no_pairwise_reduction8i32'
> @@ -676,7 +676,7 @@ define fastcc i32 @no_pairwise_reduction
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32>
> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
> i32 undef>
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %r = extractelement <8 x i32> %bin.rdx8, i32 0
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction:
> %r = extractelement <8 x i32> %bin.rdx8, i32 0
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 %r
> ;
> ; SSE42-LABEL: 'no_pairwise_reduction8i32'
> @@ -817,7 +817,7 @@ define fastcc double @pairwise_reduction
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4
> x i32> <i32 0, i32 undef, i32 undef, i32 undef>
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4
> x i32> <i32 1, i32 undef, i32 undef, i32 undef>
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %r = extractelement <4 x double> %bin.rdx8, i32 0
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction:
> %r = extractelement <4 x double> %bin.rdx8, i32 0
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret double %r
> ;
> ; SSSE3-LABEL: 'pairwise_reduction4double'
> @@ -827,7 +827,7 @@ define fastcc double @pairwise_reduction
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4
> x i32> <i32 0, i32 undef, i32 undef, i32 undef>
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4
> x i32> <i32 1, i32 undef, i32 undef, i32 undef>
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %r = extractelement <4 x double> %bin.rdx8, i32 0
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction:
> %r = extractelement <4 x double> %bin.rdx8, i32 0
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret double %r
> ;
> ; SSE42-LABEL: 'pairwise_reduction4double'
> @@ -882,7 +882,7 @@ define fastcc float @pairwise_reduction8
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8
> x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8
> x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %r = extractelement <8 x float> %bin.rdx9, i32 0
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction:
> %r = extractelement <8 x float> %bin.rdx9, i32 0
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret float %r
> ;
> ; SSSE3-LABEL: 'pairwise_reduction8float'
> @@ -895,7 +895,7 @@ define fastcc float @pairwise_reduction8
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8
> x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8
> x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction:
> %r = extractelement <8 x float> %bin.rdx9, i32 0
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction:
> %r = extractelement <8 x float> %bin.rdx9, i32 0
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret float %r
> ;
> ; SSE42-LABEL: 'pairwise_reduction8float'
> @@ -1048,7 +1048,7 @@ define fastcc i64 @pairwise_reduction4i6
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x
> i32> <i32 0, i32 undef, i32 undef, i32 undef>
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef>
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction:
> %r = extractelement <4 x i64> %bin.rdx8, i32 0
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction:
> %r = extractelement <4 x i64> %bin.rdx8, i32 0
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i64 %r
> ;
> ; SSSE3-LABEL: 'pairwise_reduction4i64'
> @@ -1058,7 +1058,7 @@ define fastcc i64 @pairwise_reduction4i6
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x
> i32> <i32 0, i32 undef, i32 undef, i32 undef>
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef>
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction:
> %r = extractelement <4 x i64> %bin.rdx8, i32 0
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction:
> %r = extractelement <4 x i64> %bin.rdx8, i32 0
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i64 %r
> ;
> ; SSE42-LABEL: 'pairwise_reduction4i64'
> @@ -1180,7 +1180,7 @@ define fastcc i32 @pairwise_reduction8i3
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x
> i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1
> -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %r = extractelement <8 x i32> %bin.rdx9, i32 0
> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction:
> %r = extractelement <8 x i32> %bin.rdx9, i32 0
> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 %r
> ;
> ; SSSE3-LABEL: 'pairwise_reduction8i32'
> @@ -1193,7 +1193,7 @@ define fastcc i32 @pairwise_reduction8i3
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x
> i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction:
> %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x
> i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32
> undef, i32 undef>
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
> %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1
> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction:
> %r = extractelement <8 x i32> %bin.rdx9, i32 0
> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction:
> %r = extractelement <8 x i32> %bin.rdx9, i32 0
> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 %r
> ;
> ; SSE42-LABEL: 'pairwise_reduction8i32'
>
> Modified:
> llvm/trunk/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll
> (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll
> Fri Nov 9 11:04:27 2018
> @@ -17,28 +17,52 @@
> ;
>
> define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) {
> -; CHECK-LABEL: 'test_vXf64'
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 0, i32 1>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 2, i32 3>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 0, i32 1>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 2, i32 3>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 4, i32 5>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 6, i32 7>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 0, i32 1, i32 2, i32 3>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 2, i32 3, i32 4, i32 5>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret void
> +; SSE-LABEL: 'test_vXf64'
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 0, i32 1>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 2, i32 3>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 0, i32 1>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 2, i32 3>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 4, i32 5>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 6, i32 7>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 0, i32 1, i32 2, i32 3>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 2, i32 3, i32 4, i32 5>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret void
> +;
> +; AVX-LABEL: 'test_vXf64'
> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 0, i32 1>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 2, i32 3>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 0, i32 1>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 2, i32 3>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 4, i32 5>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 6, i32 7>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 0, i32 1, i32 2, i32 3>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 2, i32 3, i32 4, i32 5>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret void
> +;
> +; AVX512-LABEL: 'test_vXf64'
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 0, i32 1>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 2, i32 3>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 0, i32 1>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 2, i32 3>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 4, i32 5>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 6, i32 7>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 0, i32 1, i32 2, i32 3>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 2, i32 3, i32 4, i32 5>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret void
> ;
> ; BTVER2-LABEL: 'test_vXf64'
> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 0, i32 1>
> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 0, i32 1>
> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 2, i32 3>
> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 0, i32 1>
> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 0, i32 1>
> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 2, i32 3>
> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 4, i32 5>
> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 4, i32 5>
> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x
> i32> <i32 6, i32 7>
> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 0, i32 1, i32 2, i32 3>
> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 0, i32 1, i32 2, i32 3>
> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 2, i32 3, i32 4, i32 5>
> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x
> i32> <i32 4, i32 5, i32 6, i32 7>
> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret void
> ;
> %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x
> i32> <i32 0, i32 1>
> @@ -54,28 +78,52 @@ define void @test_vXf64(<4 x double> %sr
> }
>
> define void @test_vXfi64(<4 x i64> %src256, <8 x i64> %src512) {
> -; CHECK-LABEL: 'test_vXfi64'
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 4, i32 5>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 6, i32 7>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 0, i32 1, i32 2, i32 3>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 2, i32 3, i32 4, i32 5>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 4, i32 5, i32 6, i32 7>
> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret void
> +; SSE-LABEL: 'test_vXfi64'
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 4, i32 5>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 6, i32 7>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 0, i32 1, i32 2, i32 3>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 2, i32 3, i32 4, i32 5>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 4, i32 5, i32 6, i32 7>
> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret void
> +;
> +; AVX-LABEL: 'test_vXfi64'
> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 4, i32 5>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 6, i32 7>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 0, i32 1, i32 2, i32 3>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 2, i32 3, i32 4, i32 5>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 4, i32 5, i32 6, i32 7>
> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret void
> +;
> +; AVX512-LABEL: 'test_vXfi64'
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 4, i32 5>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 6, i32 7>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 0, i32 1, i32 2, i32 3>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 2, i32 3, i32 4, i32 5>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 4, i32 5, i32 6, i32 7>
> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret void
> ;
> ; BTVER2-LABEL: 'test_vXfi64'
> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 0, i32 1>
> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 2, i32 3>
> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 4, i32 5>
> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 4, i32 5>
> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32
> 6, i32 7>
> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 0, i32 1, i32 2, i32 3>
> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 0, i32 1, i32 2, i32 3>
> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 2, i32 3, i32 4, i32 5>
> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 4, i32 5, i32 6, i32 7>
> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32>
> <i32 4, i32 5, i32 6, i32 7>
> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret void
> ;
> %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32>
> <i32 0, i32 1>
>
> Modified:
> llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll?rev=346538&r1=346537&r2=346538&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
> (original)
> +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll Fri
> Nov 9 11:04:27 2018
> @@ -13,7 +13,7 @@
> ; Vector cost is 5, Scalar cost is 7
> ; CHECK: Adding cost -2 for reduction that starts with %7 = load i32,
> i32* %arrayidx.7, align 4 (It is a splitting reduction)
> ; Vector cost is 11, Scalar cost is 7
> -; SSE2: Adding cost 4 for reduction that starts with %7 = load i32,
> i32* %arrayidx.7, align 4 (It is a splitting reduction)
> +; SSE2: Adding cost 3 for reduction that starts with %7 = load i32,
> i32* %arrayidx.7, align 4 (It is a splitting reduction)
> define i32 @test_add(i32* nocapture readonly %p) {
> ; CHECK-LABEL: @test_add(
> ; CHECK-NEXT: entry:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181109/1c50a087/attachment-0001.html>
More information about the llvm-commits
mailing list