[llvm] b3480d5 - [SLP] Compute min/max scalar reduction costs using min/max intrinsics instead of expanded cmp+sel
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 13 09:00:50 PDT 2023
Author: Simon Pilgrim
Date: 2023-04-13T17:00:39+01:00
New Revision: b3480d5ede83720abf58dcadf4e79015e5a7e60a
URL: https://github.com/llvm/llvm-project/commit/b3480d5ede83720abf58dcadf4e79015e5a7e60a
DIFF: https://github.com/llvm/llvm-project/commit/b3480d5ede83720abf58dcadf4e79015e5a7e60a.diff
LOG: [SLP] Compute min/max scalar reduction costs using min/max intrinsics instead of expanded cmp+sel
By default these will expand back to cmp/sel, but some targets (X86) has optimized costs for scalar integer min/max patterns which are lower than the default expansion (pre-SSE41 is particularly weak for vector min/max support).
Differential Revision: [SLP] Compute min/max scalar reduction costs using min/max intrinsics instead of expanded cmp+sel
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8d26cb2d024ec..7f79095153e90 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13621,29 +13621,11 @@ class HorizontalReduction {
break;
}
case RecurKind::FMax:
- case RecurKind::FMin: {
- auto *SclCondTy = CmpInst::makeCmpResultType(ScalarTy);
- if (!AllConsts) {
- auto *VecCondTy =
- cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
- VectorCost =
- TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
- /*IsUnsigned=*/false, FMF, CostKind);
- }
- CmpInst::Predicate RdxPred = getMinMaxReductionPredicate(RdxKind);
- ScalarCost = EvaluateScalarCost([&]() {
- return TTI->getCmpSelInstrCost(Instruction::FCmp, ScalarTy, SclCondTy,
- RdxPred, CostKind) +
- TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy, SclCondTy,
- RdxPred, CostKind);
- });
- break;
- }
+ case RecurKind::FMin:
case RecurKind::SMax:
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin: {
- auto *SclCondTy = CmpInst::makeCmpResultType(ScalarTy);
if (!AllConsts) {
auto *VecCondTy =
cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
@@ -13652,12 +13634,10 @@ class HorizontalReduction {
VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
IsUnsigned, FMF, CostKind);
}
- CmpInst::Predicate RdxPred = getMinMaxReductionPredicate(RdxKind);
+ Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind);
ScalarCost = EvaluateScalarCost([&]() {
- return TTI->getCmpSelInstrCost(Instruction::ICmp, ScalarTy, SclCondTy,
- RdxPred, CostKind) +
- TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy, SclCondTy,
- RdxPred, CostKind);
+ IntrinsicCostAttributes ICA(Id, ScalarTy, {ScalarTy, ScalarTy}, FMF);
+ return TTI->getIntrinsicInstrCost(ICA, CostKind);
});
break;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
index e72b6cc1a2402..66e3fbf7845a3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=x86-64-v2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=x86-64-v2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE4
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skx -passes=slp-vectorizer -S -slp-threshold=-100 | FileCheck %s --check-prefixes=CHECK,THRESH
@arr = local_unnamed_addr global [32 x i32] zeroinitializer, align 16
@@ -765,24 +765,70 @@ define float @maxf32(float) {
}
define i32 @maxi8_mutiple_uses(i32) {
-; DEFAULT-LABEL: @maxi8_mutiple_uses(
-; DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr @arr, align 16
-; DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
-; DEFAULT-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
-; DEFAULT-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
-; DEFAULT-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
-; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
-; DEFAULT-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
-; DEFAULT-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
-; DEFAULT-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
-; DEFAULT-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
-; DEFAULT-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP8]], [[TMP5]]
-; DEFAULT-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP8]], i32 [[TMP5]]
-; DEFAULT-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
-; DEFAULT-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
-; DEFAULT-NEXT: [[TMP10:%.*]] = select i1 [[TMP4]], i32 3, i32 4
-; DEFAULT-NEXT: store i32 [[TMP10]], ptr @var, align 8
-; DEFAULT-NEXT: ret i32 [[OP_RDX5]]
+; SSE2-LABEL: @maxi8_mutiple_uses(
+; SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
+; SSE2-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
+; SSE2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
+; SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; SSE2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
+; SSE2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
+; SSE2-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
+; SSE2-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
+; SSE2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
+; SSE2-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
+; SSE2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
+; SSE2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
+; SSE2-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
+; SSE2-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
+; SSE2-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
+; SSE2-NEXT: [[TMP18:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; SSE2-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
+; SSE2-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
+; SSE2-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
+; SSE2-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
+; SSE2-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
+; SSE2-NEXT: [[TMP24:%.*]] = select i1 [[TMP4]], i32 3, i32 4
+; SSE2-NEXT: store i32 [[TMP24]], ptr @var, align 8
+; SSE2-NEXT: ret i32 [[TMP23]]
+;
+; SSE4-LABEL: @maxi8_mutiple_uses(
+; SSE4-NEXT: [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; SSE4-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
+; SSE4-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
+; SSE4-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
+; SSE4-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; SSE4-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; SSE4-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
+; SSE4-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
+; SSE4-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
+; SSE4-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
+; SSE4-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP8]], [[TMP5]]
+; SSE4-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP8]], i32 [[TMP5]]
+; SSE4-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
+; SSE4-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
+; SSE4-NEXT: [[TMP10:%.*]] = select i1 [[TMP4]], i32 3, i32 4
+; SSE4-NEXT: store i32 [[TMP10]], ptr @var, align 8
+; SSE4-NEXT: ret i32 [[OP_RDX5]]
+;
+; AVX-LABEL: @maxi8_mutiple_uses(
+; AVX-NEXT: [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; AVX-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
+; AVX-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
+; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
+; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; AVX-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; AVX-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
+; AVX-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
+; AVX-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
+; AVX-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
+; AVX-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP8]], [[TMP5]]
+; AVX-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP8]], i32 [[TMP5]]
+; AVX-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
+; AVX-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
+; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP4]], i32 3, i32 4
+; AVX-NEXT: store i32 [[TMP10]], ptr @var, align 8
+; AVX-NEXT: ret i32 [[OP_RDX5]]
;
; THRESH-LABEL: @maxi8_mutiple_uses(
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @arr, align 16
@@ -901,24 +947,70 @@ define i32 @maxi8_mutiple_uses2(i32) {
}
define i32 @maxi8_wrong_parent(i32) {
-; DEFAULT-LABEL: @maxi8_wrong_parent(
-; DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr @arr, align 16
-; DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
-; DEFAULT-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
-; DEFAULT-NEXT: br label [[PP:%.*]]
-; DEFAULT: pp:
-; DEFAULT-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
-; DEFAULT-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
-; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
-; DEFAULT-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
-; DEFAULT-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
-; DEFAULT-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
-; DEFAULT-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
-; DEFAULT-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP8]], [[TMP5]]
-; DEFAULT-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP8]], i32 [[TMP5]]
-; DEFAULT-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
-; DEFAULT-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
-; DEFAULT-NEXT: ret i32 [[OP_RDX5]]
+; SSE2-LABEL: @maxi8_wrong_parent(
+; SSE2-NEXT: [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; SSE2-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
+; SSE2-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
+; SSE2-NEXT: br label [[PP:%.*]]
+; SSE2: pp:
+; SSE2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
+; SSE2-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; SSE2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
+; SSE2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
+; SSE2-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
+; SSE2-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
+; SSE2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
+; SSE2-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
+; SSE2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
+; SSE2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
+; SSE2-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
+; SSE2-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
+; SSE2-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
+; SSE2-NEXT: [[TMP18:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; SSE2-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
+; SSE2-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
+; SSE2-NEXT: [[TMP21:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
+; SSE2-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
+; SSE2-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
+; SSE2-NEXT: ret i32 [[TMP23]]
+;
+; SSE4-LABEL: @maxi8_wrong_parent(
+; SSE4-NEXT: [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; SSE4-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
+; SSE4-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
+; SSE4-NEXT: br label [[PP:%.*]]
+; SSE4: pp:
+; SSE4-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
+; SSE4-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; SSE4-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; SSE4-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
+; SSE4-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
+; SSE4-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
+; SSE4-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
+; SSE4-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP8]], [[TMP5]]
+; SSE4-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP8]], i32 [[TMP5]]
+; SSE4-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
+; SSE4-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
+; SSE4-NEXT: ret i32 [[OP_RDX5]]
+;
+; AVX-LABEL: @maxi8_wrong_parent(
+; AVX-NEXT: [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; AVX-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
+; AVX-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
+; AVX-NEXT: br label [[PP:%.*]]
+; AVX: pp:
+; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
+; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; AVX-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; AVX-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
+; AVX-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
+; AVX-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
+; AVX-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
+; AVX-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP8]], [[TMP5]]
+; AVX-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP8]], i32 [[TMP5]]
+; AVX-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
+; AVX-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
+; AVX-NEXT: ret i32 [[OP_RDX5]]
;
; THRESH-LABEL: @maxi8_wrong_parent(
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @arr, align 16
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
index 8c8c64473da31..a8e69d0a8c713 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
@@ -11,23 +11,41 @@ define void @hoge() {
; CHECK-NEXT: ret void
; CHECK: bb2:
; CHECK-NEXT: [[T:%.*]] = select i1 undef, i16 undef, i16 15
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> <i16 undef, i16 poison>, i16 [[T]], i32 1
-; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i16> [[TMP0]] to <2 x i32>
-; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> <i32 63, i32 undef>, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], undef
-; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 1>
-; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[SHUFFLE2]], <i32 15, i32 undef, i32 31, i32 47>
-; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
-; CHECK-NEXT: [[T19:%.*]] = select i1 undef, i32 [[TMP5]], i32 undef
+; CHECK-NEXT: [[T3:%.*]] = sext i16 undef to i32
+; CHECK-NEXT: [[T4:%.*]] = sext i16 [[T]] to i32
+; CHECK-NEXT: [[T5:%.*]] = sub nsw i32 undef, [[T4]]
+; CHECK-NEXT: [[T6:%.*]] = sub i32 [[T5]], undef
+; CHECK-NEXT: [[T7:%.*]] = sub nsw i32 63, [[T3]]
+; CHECK-NEXT: [[T8:%.*]] = sub i32 [[T7]], undef
+; CHECK-NEXT: [[T9:%.*]] = add i32 [[T8]], undef
+; CHECK-NEXT: [[T10:%.*]] = add nsw i32 [[T6]], 15
+; CHECK-NEXT: [[T11:%.*]] = icmp sgt i32 [[T9]], [[T10]]
+; CHECK-NEXT: [[T12:%.*]] = select i1 [[T11]], i32 [[T9]], i32 [[T10]]
+; CHECK-NEXT: [[T13:%.*]] = add nsw i32 [[T6]], 31
+; CHECK-NEXT: [[T14:%.*]] = icmp sgt i32 [[T12]], [[T13]]
+; CHECK-NEXT: [[T15:%.*]] = select i1 [[T14]], i32 [[T12]], i32 [[T13]]
+; CHECK-NEXT: [[T16:%.*]] = add nsw i32 [[T6]], 47
+; CHECK-NEXT: [[T17:%.*]] = icmp sgt i32 [[T15]], [[T16]]
+; CHECK-NEXT: [[T18:%.*]] = select i1 [[T17]], i32 [[T15]], i32 [[T16]]
+; CHECK-NEXT: [[T19:%.*]] = select i1 undef, i32 [[T18]], i32 undef
; CHECK-NEXT: [[T20:%.*]] = icmp sgt i32 [[T19]], 63
-; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <2 x i32> undef, [[TMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP6]], undef
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[SHUFFLE]], <i32 -17, i32 -33, i32 -33, i32 -49>
-; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP8]])
-; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 undef, [[TMP9]]
-; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 undef, i32 [[TMP9]]
-; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX1]]
+; CHECK-NEXT: [[T21:%.*]] = sub nsw i32 undef, [[T3]]
+; CHECK-NEXT: [[T22:%.*]] = sub i32 [[T21]], undef
+; CHECK-NEXT: [[T23:%.*]] = sub nsw i32 undef, [[T4]]
+; CHECK-NEXT: [[T24:%.*]] = sub i32 [[T23]], undef
+; CHECK-NEXT: [[T25:%.*]] = add nsw i32 [[T24]], -49
+; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T22]], -33
+; CHECK-NEXT: [[T35:%.*]] = add nsw i32 [[T24]], -33
+; CHECK-NEXT: [[T40:%.*]] = add nsw i32 [[T22]], -17
+; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 undef, [[T40]]
+; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 undef, i32 [[T40]]
+; CHECK-NEXT: [[OP_RDX2:%.*]] = icmp slt i32 [[T35]], [[T30]]
+; CHECK-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[T35]], i32 [[T30]]
+; CHECK-NEXT: [[OP_RDX4:%.*]] = icmp slt i32 [[OP_RDX1]], [[OP_RDX3]]
+; CHECK-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
+; CHECK-NEXT: [[OP_RDX6:%.*]] = icmp slt i32 [[OP_RDX5]], [[T25]]
+; CHECK-NEXT: [[OP_RDX7:%.*]] = select i1 [[OP_RDX6]], i32 [[OP_RDX5]], i32 [[T25]]
+; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX7]]
; CHECK-NEXT: unreachable
;
bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
index 126b3c31ec596..013db32992a04 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
@@ -4,19 +4,32 @@
define i32 @foo(ptr nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[ARR:%.*]], align 4
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0>
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A2:%.*]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7
-; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]])
-; CHECK-NEXT: ret i32 [[TMP11]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 1
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[A1:%.*]]
+; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[TMP0]], [[A2:%.*]]
+; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[TMP0]], [[A3:%.*]]
+; CHECK-NEXT: [[ADD6:%.*]] = add i32 [[TMP0]], [[A4:%.*]]
+; CHECK-NEXT: [[ADD8:%.*]] = add i32 [[TMP0]], [[A5:%.*]]
+; CHECK-NEXT: [[ADD10:%.*]] = add i32 [[TMP0]], [[A6:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARR]], align 4
+; CHECK-NEXT: [[ADD12:%.*]] = add i32 [[TMP1]], [[A7:%.*]]
+; CHECK-NEXT: [[ADD14:%.*]] = add i32 [[TMP1]], [[A8:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], [[ADD2]]
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[ADD2]]
+; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], [[ADD4]]
+; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 [[ADD4]]
+; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], [[ADD6]]
+; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 [[ADD6]]
+; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], [[ADD8]]
+; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 [[ADD8]]
+; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], [[ADD10]]
+; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 [[ADD10]]
+; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], [[ADD12]]
+; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 [[ADD12]]
+; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], [[ADD14]]
+; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 [[ADD14]]
+; CHECK-NEXT: ret i32 [[COND44]]
;
entry:
%arrayidx = getelementptr inbounds i32, ptr %arr, i64 1
@@ -50,19 +63,36 @@ entry:
define i32 @foo1(ptr nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) {
; CHECK-LABEL: @foo1(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARR:%.*]], align 4
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 2, i32 1, i32 3, i32 1, i32 1, i32 0, i32 2, i32 1>
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A2:%.*]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7
-; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]])
-; CHECK-NEXT: ret i32 [[TMP11]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 1
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[A1:%.*]]
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 2
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
+; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[TMP1]], [[A2:%.*]]
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 3
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[TMP2]], [[A3:%.*]]
+; CHECK-NEXT: [[ADD6:%.*]] = add i32 [[TMP0]], [[A4:%.*]]
+; CHECK-NEXT: [[ADD8:%.*]] = add i32 [[TMP0]], [[A5:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARR]], align 4
+; CHECK-NEXT: [[ADD10:%.*]] = add i32 [[TMP3]], [[A6:%.*]]
+; CHECK-NEXT: [[ADD12:%.*]] = add i32 [[TMP1]], [[A7:%.*]]
+; CHECK-NEXT: [[ADD14:%.*]] = add i32 [[TMP0]], [[A8:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], [[ADD2]]
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[ADD2]]
+; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], [[ADD4]]
+; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 [[ADD4]]
+; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], [[ADD6]]
+; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 [[ADD6]]
+; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], [[ADD8]]
+; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 [[ADD8]]
+; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], [[ADD10]]
+; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 [[ADD10]]
+; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], [[ADD12]]
+; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 [[ADD12]]
+; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], [[ADD14]]
+; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 [[ADD14]]
+; CHECK-NEXT: ret i32 [[COND44]]
;
entry:
%arrayidx = getelementptr inbounds i32, ptr %arr, i64 1
@@ -100,19 +130,36 @@ entry:
define i32 @foo2(ptr nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) {
; CHECK-LABEL: @foo2(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARR:%.*]], align 4
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 2, i32 3, i32 3, i32 0, i32 1, i32 0, i32 2, i32 1>
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A2:%.*]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7
-; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]])
-; CHECK-NEXT: ret i32 [[TMP11]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 3
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[A1:%.*]]
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 2
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
+; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[TMP1]], [[A2:%.*]]
+; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[TMP0]], [[A3:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARR]], align 4
+; CHECK-NEXT: [[ADD6:%.*]] = add i32 [[TMP2]], [[A4:%.*]]
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 1
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
+; CHECK-NEXT: [[ADD8:%.*]] = add i32 [[TMP3]], [[A5:%.*]]
+; CHECK-NEXT: [[ADD10:%.*]] = add i32 [[TMP2]], [[A6:%.*]]
+; CHECK-NEXT: [[ADD12:%.*]] = add i32 [[TMP1]], [[A7:%.*]]
+; CHECK-NEXT: [[ADD14:%.*]] = add i32 [[TMP3]], [[A8:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], [[ADD2]]
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[ADD2]]
+; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], [[ADD4]]
+; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 [[ADD4]]
+; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], [[ADD6]]
+; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 [[ADD6]]
+; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], [[ADD8]]
+; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 [[ADD8]]
+; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], [[ADD10]]
+; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 [[ADD10]]
+; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], [[ADD12]]
+; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 [[ADD12]]
+; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], [[ADD14]]
+; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 [[ADD14]]
+; CHECK-NEXT: ret i32 [[COND44]]
;
entry:
%arrayidx = getelementptr inbounds i32, ptr %arr, i64 3
More information about the llvm-commits
mailing list