[llvm] [SLP]Remove ExtraArgs from reductions. (PR #99923)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 26 08:35:47 PDT 2024
https://github.com/alexey-bataev updated https://github.com/llvm/llvm-project/pull/99923
>From 0312a35f9205c3fb90834f0f5db3fcc50c765977 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Mon, 22 Jul 2024 19:25:26 +0000
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 99 +++++--------------
.../X86/external-used-across-reductions.ll | 8 +-
.../gather-extractelements-different-bbs.ll | 26 ++---
.../SLPVectorizer/X86/horizontal-minmax.ll | 72 ++++++--------
.../X86/reduced-gathered-vectorized.ll | 22 ++---
...reduction-gather-non-scheduled-extracts.ll | 8 +-
.../SLPVectorizer/X86/reduction-logical.ll | 6 +-
.../vec_list_bias_external_insert_shuffled.ll | 32 +++---
8 files changed, 101 insertions(+), 172 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 667c4eb311c22..258195b8bc297 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8363,6 +8363,12 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
: TTI.getStridedMemoryOpCost(
Instruction::Load, LoadTy, LI->getPointerOperand(),
/*VariableMask=*/false, Alignment, CostKind, LI);
+ // Add external uses costs.
+ for (auto [Idx, V] : enumerate(VL.slice(
+ P.first, std::min<unsigned>(VL.size() - P.first, VF))))
+ if (!R.areAllUsersVectorized(cast<Instruction>(V)))
+ GatherCost += TTI.getVectorInstrCost(Instruction::ExtractElement,
+ LoadTy, CostKind, Idx);
// Estimate GEP cost.
SmallVector<Value *> PointerOps(VF);
for (auto [I, V] : enumerate(VL.slice(P.first, VF)))
@@ -16640,8 +16646,6 @@ class HorizontalReduction {
SmallVector<SmallVector<Value *>> ReducedVals;
/// Maps reduced value to the corresponding reduction operation.
DenseMap<Value *, SmallVector<Instruction *>> ReducedValsToOps;
- // Use map vector to make stable output.
- MapVector<Instruction *, Value *> ExtraArgs;
WeakTrackingVH ReductionRoot;
/// The type of reduction operation.
RecurKind RdxKind;
@@ -16972,14 +16976,12 @@ class HorizontalReduction {
// Iterate through all the operands of the possible reduction tree and
// gather all the reduced values, sorting them by their value id.
- BasicBlock *BB = Root->getParent();
bool IsCmpSelMinMax = isCmpSelMinMax(Root);
SmallVector<Instruction *> Worklist(1, Root);
// Checks if the operands of the \p TreeN instruction are also reduction
// operations or should be treated as reduced values or an extra argument,
// which is not part of the reduction.
auto CheckOperands = [&](Instruction *TreeN,
- SmallVectorImpl<Value *> &ExtraArgs,
SmallVectorImpl<Value *> &PossibleReducedVals,
SmallVectorImpl<Instruction *> &ReductionOps) {
for (int I : reverse(seq<int>(getFirstOperandIndex(TreeN),
@@ -16987,12 +16989,6 @@ class HorizontalReduction {
Value *EdgeVal = getRdxOperand(TreeN, I);
ReducedValsToOps[EdgeVal].push_back(TreeN);
auto *EdgeInst = dyn_cast<Instruction>(EdgeVal);
- // Edge has wrong parent - mark as an extra argument.
- if (EdgeInst && !isVectorLikeInstWithConstOps(EdgeInst) &&
- !hasSameParent(EdgeInst, BB)) {
- ExtraArgs.push_back(EdgeVal);
- continue;
- }
// If the edge is not an instruction, or it is different from the main
// reduction opcode or has too many uses - possible reduced value.
// Also, do not try to reduce const values, if the operation is not
@@ -17021,6 +17017,7 @@ class HorizontalReduction {
SmallSet<size_t, 2> LoadKeyUsed;
auto GenerateLoadsSubkey = [&](size_t Key, LoadInst *LI) {
+ Key = hash_combine(hash_value(LI->getParent()), Key);
Value *Ptr = getUnderlyingObject(LI->getPointerOperand());
if (LoadKeyUsed.contains(Key)) {
auto LIt = LoadsMap.find(Ptr);
@@ -17052,39 +17049,22 @@ class HorizontalReduction {
while (!Worklist.empty()) {
Instruction *TreeN = Worklist.pop_back_val();
- SmallVector<Value *> Args;
SmallVector<Value *> PossibleRedVals;
SmallVector<Instruction *> PossibleReductionOps;
- CheckOperands(TreeN, Args, PossibleRedVals, PossibleReductionOps);
- // If too many extra args - mark the instruction itself as a reduction
- // value, not a reduction operation.
- if (Args.size() < 2) {
- addReductionOps(TreeN);
- // Add extra args.
- if (!Args.empty()) {
- assert(Args.size() == 1 && "Expected only single argument.");
- ExtraArgs[TreeN] = Args.front();
- }
- // Add reduction values. The values are sorted for better vectorization
- // results.
- for (Value *V : PossibleRedVals) {
- size_t Key, Idx;
- std::tie(Key, Idx) = generateKeySubkey(V, &TLI, GenerateLoadsSubkey,
- /*AllowAlternate=*/false);
- ++PossibleReducedVals[Key][Idx]
- .insert(std::make_pair(V, 0))
- .first->second;
- }
- Worklist.append(PossibleReductionOps.rbegin(),
- PossibleReductionOps.rend());
- } else {
+ CheckOperands(TreeN, PossibleRedVals, PossibleReductionOps);
+ addReductionOps(TreeN);
+ // Add reduction values. The values are sorted for better vectorization
+ // results.
+ for (Value *V : PossibleRedVals) {
size_t Key, Idx;
- std::tie(Key, Idx) = generateKeySubkey(TreeN, &TLI, GenerateLoadsSubkey,
+ std::tie(Key, Idx) = generateKeySubkey(V, &TLI, GenerateLoadsSubkey,
/*AllowAlternate=*/false);
++PossibleReducedVals[Key][Idx]
- .insert(std::make_pair(TreeN, 0))
+ .insert(std::make_pair(V, 0))
.first->second;
}
+ Worklist.append(PossibleReductionOps.rbegin(),
+ PossibleReductionOps.rend());
}
auto PossibleReducedValsVect = PossibleReducedVals.takeVector();
// Sort values by the total number of values kinds to start the reduction
@@ -17161,18 +17141,9 @@ class HorizontalReduction {
// Track the reduced values in case if they are replaced by extractelement
// because of the vectorization.
- DenseMap<Value *, WeakTrackingVH> TrackedVals(
- ReducedVals.size() * ReducedVals.front().size() + ExtraArgs.size());
- BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
+ DenseMap<Value *, WeakTrackingVH> TrackedVals(ReducedVals.size() *
+ ReducedVals.front().size());
SmallVector<std::pair<Value *, Value *>> ReplacedExternals;
- ExternallyUsedValues.reserve(ExtraArgs.size() + 1);
- // The same extra argument may be used several times, so log each attempt
- // to use it.
- for (const std::pair<Instruction *, Value *> &Pair : ExtraArgs) {
- assert(Pair.first && "DebugLoc must be set.");
- ExternallyUsedValues[Pair.second].push_back(Pair.first);
- TrackedVals.try_emplace(Pair.second, Pair.second);
- }
// The compare instruction of a min/max is the insertion point for new
// instructions and may be replaced with a new compare instruction.
@@ -17211,9 +17182,6 @@ class HorizontalReduction {
any_of(ReductionOps.back(), [](Value *V) {
return isBoolLogicOp(cast<Instruction>(V));
});
- // The reduction root is used as the insertion point for new instructions,
- // so set it as externally used to prevent it from being deleted.
- ExternallyUsedValues[ReductionRoot];
SmallDenseSet<Value *> IgnoreList(ReductionOps.size() *
ReductionOps.front().size());
for (ReductionOpsType &RdxOps : ReductionOps)
@@ -17435,8 +17403,11 @@ class HorizontalReduction {
V.reorderBottomToTop(/*IgnoreReorder=*/true);
// Keep extracted other reduction values, if they are used in the
// vectorization trees.
- BoUpSLP::ExtraValueToDebugLocsMap LocalExternallyUsedValues(
- ExternallyUsedValues);
+ BoUpSLP::ExtraValueToDebugLocsMap LocalExternallyUsedValues;
+ // The reduction root is used as the insertion point for new
+ // instructions, so set it as externally used to prevent it from being
+ // deleted.
+ LocalExternallyUsedValues[ReductionRoot];
for (unsigned Cnt = 0, Sz = ReducedVals.size(); Cnt < Sz; ++Cnt) {
if (Cnt == I || (ShuffledExtracts && Cnt == I - 1))
continue;
@@ -17483,23 +17454,6 @@ class HorizontalReduction {
for (Value *RdxVal : VL)
if (RequiredExtract.contains(RdxVal))
LocalExternallyUsedValues[RdxVal];
- // Update LocalExternallyUsedValues for the scalar, replaced by
- // extractelement instructions.
- DenseMap<Value *, Value *> ReplacementToExternal;
- for (const std::pair<Value *, Value *> &Pair : ReplacedExternals)
- ReplacementToExternal.try_emplace(Pair.second, Pair.first);
- for (const std::pair<Value *, Value *> &Pair : ReplacedExternals) {
- Value *Ext = Pair.first;
- auto RIt = ReplacementToExternal.find(Ext);
- while (RIt != ReplacementToExternal.end()) {
- Ext = RIt->second;
- RIt = ReplacementToExternal.find(Ext);
- }
- auto *It = ExternallyUsedValues.find(Ext);
- if (It == ExternallyUsedValues.end())
- continue;
- LocalExternallyUsedValues[Pair.second].append(It->second);
- }
V.buildExternalUses(LocalExternallyUsedValues);
V.computeMinimumValueSizes();
@@ -17701,11 +17655,6 @@ class HorizontalReduction {
ExtraReductions.emplace_back(RedOp, RdxVal);
}
}
- for (auto &Pair : ExternallyUsedValues) {
- // Add each externally used value to the final reduction.
- for (auto *I : Pair.second)
- ExtraReductions.emplace_back(I, Pair.first);
- }
// Iterate through all not-vectorized reduction values/extra arguments.
bool InitStep = true;
while (ExtraReductions.size() > 1) {
@@ -17857,6 +17806,8 @@ class HorizontalReduction {
assert(IsSupportedHorRdxIdentityOp &&
"The optimization of matched scalar identity horizontal reductions "
"must be supported.");
+ if (Cnt == 1)
+ return VectorizedValue;
switch (RdxKind) {
case RecurKind::Add: {
// res = mul vv, n
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll
index 31ad629160c8d..3d3d00f4a0b3f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll
@@ -13,11 +13,11 @@ define void @test() {
; CHECK-NEXT: [[PHI1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX25:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x i64> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[LOOP]] ]
; CHECK-NEXT: [[TMP7:%.*]] = mul <8 x i64> [[TMP6]], <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP1]], <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP7]])
-; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP1]])
-; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP8]], 2
-; CHECK-NEXT: [[OP_RDX33:%.*]] = add i64 [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[OP_RDX25]] = add i64 [[OP_RDX33]], [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]])
+; CHECK-NEXT: [[OP_RDX16:%.*]] = add i64 [[TMP9]], [[TMP8]]
+; CHECK-NEXT: [[OP_RDX25]] = add i64 [[OP_RDX16]], [[TMP3]]
; CHECK-NEXT: br label [[LOOP]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll
index be790b772a2eb..e66cce1b58287 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll
@@ -4,30 +4,24 @@
define i32 @foo(i32 %a) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[A:%.*]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <2 x i32> zeroinitializer, [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = sub nsw i32 0, [[A:%.*]]
+; CHECK-NEXT: [[LOCAL:%.*]] = sub nsw i32 0, 0
; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i32> [[TMP1]], <i32 1, i32 3>
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1
-; CHECK-NEXT: [[OP_RDX10:%.*]] = add i32 [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[OP_RDX11:%.*]] = add i32 [[OP_RDX10]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[LOCAL]], 3
+; CHECK-NEXT: [[OP_RDX2:%.*]] = add i32 [[TMP1]], [[TMP0]]
+; CHECK-NEXT: [[OP_RDX3:%.*]] = add i32 [[OP_RDX2]], 0
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
-; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OP_RDX11]], [[BB1]] ], [ 0, [[BB2:%.*]] ]
+; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OP_RDX3]], [[BB1]] ], [ 0, [[BB2:%.*]] ]
; CHECK-NEXT: ret i32 0
; CHECK: bb4:
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[TMP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP8]])
-; CHECK-NEXT: [[OP_RDX8:%.*]] = add i32 [[TMP9]], 0
-; CHECK-NEXT: [[OP_RDX9:%.*]] = add i32 [[OP_RDX8]], [[TMP3]]
-; CHECK-NEXT: ret i32 [[OP_RDX9]]
+; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[LOCAL]], 8
+; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], 0
+; CHECK-NEXT: ret i32 [[OP_RDX1]]
; CHECK: bb5:
; CHECK-NEXT: br label [[BB4:%.*]]
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
index de06daac7a75d..fa022ad69af79 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
@@ -977,64 +977,54 @@ define i32 @maxi8_wrong_parent(i32) {
; SSE4-LABEL: @maxi8_wrong_parent(
; SSE4-NEXT: [[TMP2:%.*]] = load i32, ptr @arr, align 16
; SSE4-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
-; SSE4-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
; SSE4-NEXT: br label [[PP:%.*]]
; SSE4: pp:
-; SSE4-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
-; SSE4-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
-; SSE4-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
-; SSE4-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
-; SSE4-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
-; SSE4-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
-; SSE4-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
-; SSE4-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP8]], [[TMP5]]
-; SSE4-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP8]], i32 [[TMP5]]
+; SSE4-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; SSE4-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; SSE4-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
+; SSE4-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
+; SSE4-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP7]], [[TMP2]]
+; SSE4-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP7]], i32 [[TMP2]]
+; SSE4-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
+; SSE4-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP5]], i32 [[TMP6]]
; SSE4-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
; SSE4-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
-; SSE4-NEXT: ret i32 [[OP_RDX5]]
+; SSE4-NEXT: [[OP_RDX6:%.*]] = icmp sgt i32 [[OP_RDX5]], [[TMP3]]
+; SSE4-NEXT: [[OP_RDX7:%.*]] = select i1 [[OP_RDX6]], i32 [[OP_RDX5]], i32 [[TMP3]]
+; SSE4-NEXT: ret i32 [[OP_RDX7]]
;
; AVX-LABEL: @maxi8_wrong_parent(
; AVX-NEXT: [[TMP2:%.*]] = load i32, ptr @arr, align 16
; AVX-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
-; AVX-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
; AVX-NEXT: br label [[PP:%.*]]
; AVX: pp:
-; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
-; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
-; AVX-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
-; AVX-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
-; AVX-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
-; AVX-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
-; AVX-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
-; AVX-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP8]], [[TMP5]]
-; AVX-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP8]], i32 [[TMP5]]
+; AVX-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; AVX-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; AVX-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
+; AVX-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
+; AVX-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP7]], [[TMP2]]
+; AVX-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP7]], i32 [[TMP2]]
+; AVX-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
+; AVX-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP5]], i32 [[TMP6]]
; AVX-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
; AVX-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
-; AVX-NEXT: ret i32 [[OP_RDX5]]
+; AVX-NEXT: [[OP_RDX6:%.*]] = icmp sgt i32 [[OP_RDX5]], [[TMP3]]
+; AVX-NEXT: [[OP_RDX7:%.*]] = select i1 [[OP_RDX6]], i32 [[OP_RDX5]], i32 [[TMP3]]
+; AVX-NEXT: ret i32 [[OP_RDX7]]
;
; THRESH-LABEL: @maxi8_wrong_parent(
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @arr, align 16
-; THRESH-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
-; THRESH-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
-; THRESH-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]]
; THRESH-NEXT: br label [[PP:%.*]]
; THRESH: pp:
-; THRESH-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP3]], i32 [[TMP4]]
-; THRESH-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
-; THRESH-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
-; THRESH-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
-; THRESH-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP7]])
-; THRESH-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
-; THRESH-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP11]], i32 [[TMP9]], i32 1
-; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
-; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP6]], i32 1
-; THRESH-NEXT: [[TMP15:%.*]] = icmp sgt <2 x i32> [[TMP12]], [[TMP14]]
-; THRESH-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP12]], <2 x i32> [[TMP14]]
-; THRESH-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0
-; THRESH-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1
-; THRESH-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
-; THRESH-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[TMP17]], i32 [[TMP18]]
-; THRESH-NEXT: ret i32 [[OP_RDX5]]
+; THRESH-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; THRESH-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; THRESH-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; THRESH-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; THRESH-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison>
+; THRESH-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; THRESH-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; THRESH-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP9]])
+; THRESH-NEXT: ret i32 [[TMP10]]
;
%2 = load i32, ptr @arr, align 16
%3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduced-gathered-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduced-gathered-vectorized.ll
index 4dea52357e04f..31f0e065cf77d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduced-gathered-vectorized.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduced-gathered-vectorized.ll
@@ -6,23 +6,19 @@ define i16 @test() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 5
; CHECK-NEXT: [[A1:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 6
-; CHECK-NEXT: [[A2:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 7
; CHECK-NEXT: br label [[WHILE:%.*]]
; CHECK: while:
-; CHECK-NEXT: [[PH:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX5:%.*]], [[WHILE]] ]
+; CHECK-NEXT: [[PH:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX26:%.*]], [[WHILE]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr null, align 8
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A1]], align 16
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[A2]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr null, align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr [[A]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i64> [[TMP5]], i64 [[TMP0]], i32 2
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i64> [[TMP6]], i64 [[TMP1]], i32 3
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 4, i32 5, i32 8, i32 8>
-; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> [[TMP10]])
-; CHECK-NEXT: [[OP_RDX5]] = xor i64 [[TMP3]], [[TMP11]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr null, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> [[TMP4]])
+; CHECK-NEXT: [[OP_RDX:%.*]] = xor i64 0, [[TMP2]]
+; CHECK-NEXT: [[OP_RDX24:%.*]] = xor i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[OP_RDX25:%.*]] = xor i64 [[OP_RDX]], [[OP_RDX24]]
+; CHECK-NEXT: [[OP_RDX26]] = xor i64 [[OP_RDX25]], [[TMP5]]
; CHECK-NEXT: br label [[WHILE]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-gather-non-scheduled-extracts.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-gather-non-scheduled-extracts.ll
index f032d4b6ecd45..e8abccea6b0f4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-gather-non-scheduled-extracts.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-gather-non-scheduled-extracts.ll
@@ -7,15 +7,13 @@ define void @tes() {
; CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <2 x double> zeroinitializer, zeroinitializer
; CHECK-NEXT: br label [[TMP1:%.*]]
; CHECK: 1:
-; CHECK-NEXT: [[TMP2:%.*]] = select i1 false, i1 false, i1 false
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i1> zeroinitializer, <2 x i1> [[TMP0]], <4 x i32> <i32 0, i32 0, i32 0, i32 2>
; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP3]])
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 false, i1 [[TMP4]], i1 false
-; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP2]], i1 [[OP_RDX]], i1 false
-; CHECK-NEXT: br i1 [[OP_RDX1]], label [[TMP5:%.*]], label [[TMP6:%.*]]
-; CHECK: 5:
+; CHECK-NEXT: br i1 [[OP_RDX]], label [[TMP6:%.*]], label [[TMP5:%.*]]
+; CHECK: 4:
; CHECK-NEXT: ret void
-; CHECK: 6:
+; CHECK: 5:
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
index 838a75dcd29e0..c25e07cfd451f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
@@ -436,10 +436,9 @@ define i1 @logical_and_icmp_clamp_pred_diff(<4 x i32> %x) {
define i1 @logical_and_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
; CHECK-LABEL: @logical_and_icmp_extra_op(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[S3:%.*]] = select i1 [[C:%.*]], i1 [[C]], i1 false
; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
-; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[S3]], i1 [[TMP3]], i1 false
+; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 [[C:%.*]], i1 false
; CHECK-NEXT: ret i1 [[OP_RDX]]
;
%x0 = extractelement <4 x i32> %x, i32 0
@@ -465,10 +464,9 @@ define i1 @logical_and_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
define i1 @logical_or_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
; CHECK-LABEL: @logical_or_icmp_extra_op(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[S3:%.*]] = select i1 [[C:%.*]], i1 true, i1 [[C]]
; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
-; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[S3]], i1 true, i1 [[TMP3]]
+; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 true, i1 [[C:%.*]]
; CHECK-NEXT: ret i1 [[OP_RDX]]
;
%x0 = extractelement <4 x i32> %x, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll
index 8f1d7a11e1509..69ecf1852aedd 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll
@@ -7,9 +7,11 @@ define void @test(ptr nocapture %t2) {
; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 7
; CHECK-NEXT: [[T5:%.*]] = load i32, ptr [[T4]], align 4
; CHECK-NEXT: [[T8:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 1
+; CHECK-NEXT: [[T9:%.*]] = load i32, ptr [[T8]], align 4
; CHECK-NEXT: [[T10:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 6
; CHECK-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[T8]], align 4
+; CHECK-NEXT: [[T14:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 2
+; CHECK-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4
; CHECK-NEXT: [[T16:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 5
; CHECK-NEXT: [[T17:%.*]] = load i32, ptr [[T16]], align 4
; CHECK-NEXT: [[T20:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 3
@@ -19,11 +21,10 @@ define void @test(ptr nocapture %t2) {
; CHECK-NEXT: [[T24:%.*]] = add nsw i32 [[T23]], [[T21]]
; CHECK-NEXT: [[T25:%.*]] = sub nsw i32 [[T21]], [[T23]]
; CHECK-NEXT: [[T27:%.*]] = sub nsw i32 [[T3]], [[T24]]
-; CHECK-NEXT: [[T9:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
-; CHECK-NEXT: [[T15:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]]
; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]]
; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433
+; CHECK-NEXT: [[T32:%.*]] = mul nsw i32 [[T27]], 6270
; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137
; CHECK-NEXT: [[T37:%.*]] = add nsw i32 [[T25]], [[T11]]
; CHECK-NEXT: [[T38:%.*]] = add nsw i32 [[T17]], [[T5]]
@@ -33,19 +34,20 @@ define void @test(ptr nocapture %t2) {
; CHECK-NEXT: [[T42:%.*]] = mul nsw i32 [[T17]], 16819
; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069
; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[T27]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[T47]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> <i32 poison, i32 poison, i32 6270, i32 poison>, <4 x i32> <i32 1, i32 0, i32 6, i32 poison>
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[T40]], i32 3
-; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = mul nsw <4 x i32> [[TMP6]], [[TMP8]]
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT: [[T50:%.*]] = add nsw i32 [[T40]], [[T48]]
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 poison, i32 poison, i32 3>
-; CHECK-NEXT: [[T701:%.*]] = insertelement <8 x i32> [[TMP12]], i32 [[T50]], i32 5
+; CHECK-NEXT: [[T49:%.*]] = add nsw i32 [[T40]], [[T47]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[T15]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[T40]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[T9]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[T48]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[T67:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T32]], i32 2
+; CHECK-NEXT: [[T68:%.*]] = insertelement <8 x i32> [[T67]], i32 [[T49]], i32 3
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6
-; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7
+; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4
; CHECK-NEXT: ret void
;
>From 5dad9439961fd85167ed0c69a6a72208b4e2c1ca Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Mon, 22 Jul 2024 19:32:09 +0000
Subject: [PATCH 2/2] Fix formatting
Created using spr 1.3.5
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 258195b8bc297..b1a91ef209c16 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -17178,10 +17178,9 @@ class HorizontalReduction {
// Initialize the final value in the reduction.
return Res;
};
- bool AnyBoolLogicOp =
- any_of(ReductionOps.back(), [](Value *V) {
- return isBoolLogicOp(cast<Instruction>(V));
- });
+ bool AnyBoolLogicOp = any_of(ReductionOps.back(), [](Value *V) {
+ return isBoolLogicOp(cast<Instruction>(V));
+ });
SmallDenseSet<Value *> IgnoreList(ReductionOps.size() *
ReductionOps.front().size());
for (ReductionOpsType &RdxOps : ReductionOps)
More information about the llvm-commits
mailing list