[llvm] [AArch64][CostModel] Consider the cost of const vector (PR #117539)
Sushant Gokhale via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 15 22:41:12 PST 2024
https://github.com/sushgokh updated https://github.com/llvm/llvm-project/pull/117539
>From 7b4b39c1f836c2d01a6aba886eb4ae98a59de0e0 Mon Sep 17 00:00:00 2001
From: sgokhale <sgokhale at nvidia.com>
Date: Mon, 25 Nov 2024 16:14:17 +0530
Subject: [PATCH] [AArch64][CostModel] Consider the cost of const vector
Currently, we consider cost of const vector as zero. Consider the below e.g
```
%1 = add <2 x float> %1, <float 21.0, float 22.0>
```
Here, the cost of const vector <float 21.0, float 22.0> is considered zero.
However, this might not be the case. On AArch64 platform, this results in `adrp + ldr` instruction.
This patch alters the AArch64 cost-model to consider the cost of const vector.
Perf results(tested on `-mcpu = neoverse-v2`)(uplift indicated by + sign):
541.leela +(2.2 - 3)%
---
.../llvm/Analysis/TargetTransformInfo.h | 23 ++++
.../llvm/Analysis/TargetTransformInfoImpl.h | 9 ++
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 18 ++-
.../llvm/Transforms/Vectorize/SLPVectorizer.h | 7 ++
llvm/lib/Analysis/TargetTransformInfo.cpp | 8 ++
.../AArch64/AArch64TargetTransformInfo.cpp | 117 +++++++++++++++++-
.../AArch64/AArch64TargetTransformInfo.h | 17 ++-
.../Transforms/Vectorize/SLPVectorizer.cpp | 46 ++++++-
.../AArch64/memory-runtime-checks.ll | 18 ++-
.../SLPVectorizer/AArch64/vec3-base.ll | 11 +-
10 files changed, 246 insertions(+), 28 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 985ca1532e0149..46bacd6c703eb1 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1449,6 +1449,14 @@ class TargetTransformInfo {
const APInt &DemandedDstElts,
TTI::TargetCostKind CostKind) const;
+ /// \return Cost of materializing a constant.
+ InstructionCost getConstantMaterializationCost(
+ ArrayRef<Constant *> VL, Type *SrcTy,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ ArrayRef<SmallVector<Constant *>> ConstVectsPerTree = {},
+ ArrayRef<SmallVector<Constant *>> MaterializedConstVectsPerFunc = {})
+ const;
+
/// \return The cost of Load and Store instructions.
InstructionCost
getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
@@ -2147,6 +2155,12 @@ class TargetTransformInfo::Concept {
const APInt &DemandedDstElts,
TTI::TargetCostKind CostKind) = 0;
+ virtual InstructionCost getConstantMaterializationCost(
+ ArrayRef<Constant *> VL, Type *SrcTy,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ ArrayRef<SmallVector<Constant *>> ConstVectsPerTree = {},
+ ArrayRef<SmallVector<Constant *>> MaterializedConstVectsPerFunc = {}) = 0;
+
virtual InstructionCost
getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace, TTI::TargetCostKind CostKind,
@@ -2850,6 +2864,15 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
DemandedDstElts, CostKind);
}
+ InstructionCost getConstantMaterializationCost(
+ ArrayRef<Constant *> VL, Type *SrcTy,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ ArrayRef<SmallVector<Constant *>> ConstVectsPerTree = {},
+ ArrayRef<SmallVector<Constant *>> MaterializedConstVectsPerFunc = {})
+ override {
+ return Impl.getConstantMaterializationCost(
+ VL, SrcTy, CostKind, ConstVectsPerTree, MaterializedConstVectsPerFunc);
+ }
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 38aba183f6a173..9151389a052ab2 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -730,6 +730,15 @@ class TargetTransformInfoImplBase {
return 1;
}
+ InstructionCost getConstantMaterializationCost(
+ ArrayRef<Constant *> VL, Type *SrcTy,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ ArrayRef<SmallVector<Constant *>> ConstVectsPerTree = {},
+ ArrayRef<SmallVector<Constant *>> MaterializedConstVectsPerFunc = {})
+ const {
+ return 0;
+ }
+
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index d2fc40d8ae037e..9d69fb6e49de4d 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1368,11 +1368,19 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return Cost;
}
- InstructionCost
- getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
- unsigned AddressSpace, TTI::TargetCostKind CostKind,
- TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
- const Instruction *I = nullptr) {
+ InstructionCost getConstantMaterializationCost(
+ ArrayRef<Constant *> VL, Type *SrcTy,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ ArrayRef<SmallVector<Constant *>> ConstVectsPerTree = {},
+ ArrayRef<SmallVector<Constant *>> MaterializedConstVectsPerFunc = {}) {
+ return 0;
+ }
+
+ InstructionCost getMemoryOpCost(
+ unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
+ const Instruction *I = nullptr) {
assert(!Src->isVoidTy() && "Invalid type");
// Assume types, such as structs, are expensive.
if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index 877c83291170bf..8e45cdba10f0e2 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -22,6 +22,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
@@ -81,6 +82,12 @@ struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
DominatorTree *DT_, AssumptionCache *AC_, DemandedBits *DB_,
OptimizationRemarkEmitter *ORE_);
+ // Store constant vector(s) used in the vectorized tree. This helps in
+ // avoiding counting the constant vector cost twice if it has already been
+ // materialized.
+ static inline SmallVector<SmallVector<Constant *>>
+ MaterializedConstVectsPerFunc = {};
+
private:
/// Collect store and getelementptr instructions and organize them
/// according to the underlying object of their pointer operands. We sort the
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 1fb2b9836de0cc..7e34c788ea73bc 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1099,6 +1099,14 @@ InstructionCost TargetTransformInfo::getReplicationShuffleCost(
return Cost;
}
+InstructionCost TargetTransformInfo::getConstantMaterializationCost(
+ ArrayRef<Constant *> VL, Type *SrcTy, TTI::TargetCostKind CostKind,
+ ArrayRef<SmallVector<Constant *>> ConstVectsPerTree,
+ ArrayRef<SmallVector<Constant *>> MaterializedConstVectsPerFunc) const {
+ return TTIImpl->getConstantMaterializationCost(
+ VL, SrcTy, CostKind, ConstVectsPerTree, MaterializedConstVectsPerFunc);
+}
+
InstructionCost TargetTransformInfo::getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo,
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 7a1e401bca18cb..1b34ce7453cd1f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3785,12 +3785,116 @@ bool AArch64TTIImpl::useNeonVector(const Type *Ty) const {
return isa<FixedVectorType>(Ty) && !ST->useSVEForFixedLengthVectors();
}
-InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
- MaybeAlign Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind,
- TTI::OperandValueInfo OpInfo,
- const Instruction *I) {
+template <typename T>
+static bool HaveIdenticalVectVals(ArrayRef<Constant *> A,
+ ArrayRef<Constant *> B) {
+ auto R = zip(A, B);
+ return all_of(R, [&](std::tuple<Constant *, Constant *> P) {
+ return cast<T>(get<0>(P))->getValue() == cast<T>(get<1>(P))->getValue();
+ });
+}
+
+template <typename T>
+static bool HaveIdenticalVectTy(ArrayRef<Constant *> A,
+ ArrayRef<Constant *> B) {
+ auto R1 = all_of(A, [&](Constant *C) { return isa<T>(C); });
+ auto R2 = all_of(B, [&](Constant *C) { return isa<T>(C); });
+ return R1 & R2;
+}
+
+template <typename T>
+static bool AreIdenticalVects(ArrayRef<Constant *> A, ArrayRef<Constant *> B) {
+ if (A.empty() || B.empty() || !HaveIdenticalVectTy<T>(A, B))
+ return false;
+ return HaveIdenticalVectVals<T>(A, B);
+}
+
+InstructionCost AArch64TTIImpl::getConstantMaterializationCost(
+ ArrayRef<Constant *> VL, Type *SrcTy, TTI::TargetCostKind CostKind,
+ ArrayRef<SmallVector<Constant *>> ConstVectsPerTree,
+ ArrayRef<SmallVector<Constant *>> MaterializedConstVectsPerFunc) {
+ // Compute the scalar cost.
+ InstructionCost Cost;
+ if (!SrcTy->isVectorTy()) {
+ // FIXME: Consider floating point types as well.
+ auto *C = dyn_cast<ConstantInt>(VL[0]);
+ return C ? getIntImmCost(C->getValue(), C->getType(), CostKind)
+ : InstructionCost::getInvalid();
+ } else // Vector cost.
+ {
+ // FIXME: Consider floating point types as well.
+ if (!all_of(VL, IsaPred<ConstantInt>))
+ return InstructionCost::getInvalid();
+
+ auto isSplat = [](ArrayRef<Constant *> VL) {
+ Value *FirstNonUndef = nullptr;
+ for (Value *V : VL) {
+ if (isa<UndefValue>(V))
+ continue;
+ if (!FirstNonUndef) {
+ FirstNonUndef = V;
+ continue;
+ }
+ if (V != FirstNonUndef)
+ return false;
+ }
+ return FirstNonUndef != nullptr;
+ };
+ // FIXME: Calculate cost of scalar realization + broadcast.
+ if (isSplat(VL) || all_of(VL, IsaPred<UndefValue, PoisonValue>))
+ return InstructionCost::getInvalid();
+ // Check if this VL has already been materialized in the function.
+ for (auto &V : MaterializedConstVectsPerFunc) {
+ if (AreIdenticalVects<ConstantInt>(V, VL))
+ return 0;
+ }
+ // Check if this VL has already been seen in the SLP tree.
+ for (auto &V : ConstVectsPerTree) {
+ if (AreIdenticalVects<ConstantInt>(V, VL))
+ return 0;
+ }
+ auto *EltTy = VL[0]->getType();
+ auto *VTy = FixedVectorType::get(EltTy, VL.size());
+ auto LT = getTypeLegalizationCost(VTy);
+ // FIXME: Consider types with more legalization cost.
+ if (LT.first > 1)
+ return InstructionCost::getInvalid();
+ if (useNeonVector(VTy)) {
+ if (ST->hasSVE()) {
+ auto Elts = VTy->getNumElements();
+
+ // `index` instruction can be emitted for Elts > 2. We can't analyze
+ // this.
+ if (Elts > 2)
+ return InstructionCost::getInvalid();
+
+ // Check if all the constants are in the range -16 to 15. If not, this
+ // results in scalar/immediate index instruction.
+ auto FirstVal = cast<ConstantInt>(VL[0])->getSExtValue();
+ auto SecondVal = cast<ConstantInt>(VL[1])->getSExtValue();
+ auto IsInIndexInstrImmRange = [](int64_t Val) {
+ return Val >= -16 && Val <= 15;
+ };
+ if (IsInIndexInstrImmRange(FirstVal) &&
+ IsInIndexInstrImmRange(std::abs(SecondVal - FirstVal)))
+ Cost = 2;
+ else // It's mov + index.
+ Cost = 4;
+ } else {
+ // This results in `adrp + ldr` instruction.
+ Cost = 4;
+ }
+ } else {
+ Cost = InstructionCost::getInvalid();
+ }
+ return Cost;
+ }
+}
+
+InstructionCost AArch64TTIImpl::getMemoryOpCost(
+ unsigned Opcode, Type *Ty, MaybeAlign Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo,
+ const Instruction *I, InstructionCost ConstVectScalarCost) {
EVT VT = TLI->getValueType(DL, Ty, true);
// Type legalization can't handle structs
if (VT == MVT::Other)
@@ -3845,6 +3949,7 @@ InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
// Otherwise we need to scalarize.
return cast<FixedVectorType>(Ty)->getNumElements() * 2;
}
+
EVT EltVT = VT.getVectorElementType();
unsigned EltSize = EltVT.getScalarSizeInBits();
if (!isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 201bc831b816b3..216fcbc7a4f63f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -234,11 +234,18 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
bool IsZeroCmp) const;
bool useNeonVector(const Type *Ty) const;
- InstructionCost
- getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
- unsigned AddressSpace, TTI::TargetCostKind CostKind,
- TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
- const Instruction *I = nullptr);
+ InstructionCost getConstantMaterializationCost(
+ ArrayRef<Constant *> VL, Type *SrcTy,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ ArrayRef<SmallVector<Constant *>> ConstVectsPerTree = {},
+ ArrayRef<SmallVector<Constant *>> MaterializedConstVectsPerFunc = {});
+
+ InstructionCost getMemoryOpCost(
+ unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
+ const Instruction *I = nullptr,
+ InstructionCost ConstVectScalarCost = InstructionCost::getInvalid());
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d033b7c2ef4a92..094a372389d5cb 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1314,6 +1314,10 @@ class BoUpSLP {
class ShuffleInstructionBuilder;
public:
+ /// List of const vectors per tree. If the tree is vectorized, only then we
+ /// copy the data from tree list to function list.
+ SmallVector<SmallVector<Constant *>> ConstVectsPerTree = {};
+
/// Tracks the state we can represent the loads in the given sequence.
enum class LoadsState {
Gather,
@@ -1451,6 +1455,7 @@ class BoUpSLP {
void deleteTree() {
VectorizableTree.clear();
ScalarToTreeEntry.clear();
+ ConstVectsPerTree.clear();
MultiNodeScalars.clear();
MustGather.clear();
NonScheduledFirst.clear();
@@ -10054,9 +10059,33 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
InstructionCost getBuildVectorCost(ArrayRef<Value *> VL, Value *Root) {
- if ((!Root && allConstant(VL)) || all_of(VL, IsaPred<UndefValue>))
+ if (all_of(VL, IsaPred<UndefValue, PoisonValue>))
return TTI::TCC_Free;
auto *VecTy = getWidenedType(ScalarTy, VL.size());
+ if ((!Root && allConstant(VL))) {
+ if (isSplat(VL))
+ return TTI::TCC_Free;
+
+ InstructionCost ScalarCost;
+ SmallVector<Constant *> ConstVL;
+ for (auto *V : VL) {
+ ConstVL.clear();
+ ConstVL.emplace_back(cast<Constant>(V));
+ ScalarCost += TTI.getConstantMaterializationCost(ConstVL, V->getType());
+ }
+
+ for (auto *V : VL)
+ ConstVL.emplace_back(cast<Constant>(V));
+ InstructionCost VectorCost = TTI.getConstantMaterializationCost(
+ ConstVL, VecTy, CostKind, R.ConstVectsPerTree,
+ SLPVectorizerPass::MaterializedConstVectsPerFunc);
+ R.ConstVectsPerTree.emplace_back(ConstVL);
+
+ // Bail out for now to avoid any regressions.
+ if (!VectorCost.isValid() || !ScalarCost.isValid())
+ return 0;
+ return VectorCost - ScalarCost;
+ }
InstructionCost GatherCost = 0;
SmallVector<Value *> Gathers(VL);
if (!Root && isSplat(VL)) {
@@ -11019,8 +11048,6 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
if (E->isGather()) {
- if (allConstant(VL))
- return 0;
if (isa<InsertElementInst>(VL[0]))
return InstructionCost::getInvalid();
if (isa<CmpInst>(VL.front()))
@@ -18294,6 +18321,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
Stores.clear();
GEPs.clear();
+ MaterializedConstVectsPerFunc.clear();
bool Changed = false;
// If the target claims to have no vector registers don't attempt
@@ -18410,6 +18438,8 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
}
if (R.isLoadCombineCandidate(Chain))
return true;
+
+ R.ConstVectsPerTree.clear();
R.buildTree(Chain);
// Check if tree tiny and store itself or its value is not vectorized.
if (R.isTreeTinyAndNotFullyVectorizable()) {
@@ -18444,6 +18474,8 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
<< NV("TreeSize", R.getTreeSize()));
R.vectorizeTree();
+ for (auto &V : R.ConstVectsPerTree)
+ MaterializedConstVectsPerFunc.emplace_back(V);
return true;
}
@@ -19007,7 +19039,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
LLVM_DEBUG(dbgs() << "SLP: Analyzing " << ActualVF << " operations "
<< "\n");
-
+ R.ConstVectsPerTree.clear();
R.buildTree(Ops);
if (R.isTreeTinyAndNotFullyVectorizable())
continue;
@@ -19038,6 +19070,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
I += VF - 1;
NextInst = I + 1;
Changed = true;
+ for (auto &V : R.ConstVectsPerTree)
+ MaterializedConstVectsPerFunc.emplace_back(V);
}
}
}
@@ -19928,6 +19962,7 @@ class HorizontalReduction {
return V.isDeleted(RedValI);
}))
break;
+ V.ConstVectsPerTree.clear();
V.buildTree(VL, IgnoreList);
if (V.isTreeTinyAndNotFullyVectorizable(/*ForReduction=*/true)) {
if (!AdjustReducedVals())
@@ -20041,7 +20076,8 @@ class HorizontalReduction {
}
continue;
}
-
+ for (auto &L : V.ConstVectsPerTree)
+ SLPVectorizerPass::MaterializedConstVectsPerFunc.emplace_back(L);
LLVM_DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:"
<< Cost << ". (HorRdx)\n");
V.getORE()->emit([&]() {
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll
index 9f5744b17cb79e..929fb29a4a6790 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll
@@ -600,15 +600,27 @@ bb15: ; preds = %bb15, %bb14
define void @test_bounds_removed_before_runtime_checks(ptr %A, ptr %B, i1 %c) {
; CHECK-LABEL: @test_bounds_removed_before_runtime_checks(
; CHECK-NEXT: entry:
-; CHECK-NEXT: store <2 x i32> <i32 10, i32 300>, ptr [[A:%.*]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = fmul float 1.000000e+01, 2.000000e+01
+; CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = fmul float 3.000000e+01, 2.000000e+01
+; CHECK-NEXT: [[TMP4:%.*]] = fptosi float [[TMP3]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 100, [[TMP2]]
+; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP2]], i32 10
+; CHECK-NEXT: [[TMP7:%.*]] = select i1 false, i32 0, i32 [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 200, [[TMP4]]
+; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP4]], i32 300
+; CHECK-NEXT: [[TMP10:%.*]] = select i1 false, i32 0, i32 [[TMP9]]
+; CHECK-NEXT: store i32 [[TMP7]], ptr [[A:%.*]], align 8
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT:%.*]], ptr [[A]], i64 0, i32 1
+; CHECK-NEXT: store i32 [[TMP10]], ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[B:%.*]], align 8
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB23:%.*]], label [[BB14:%.*]]
; CHECK: bb14:
-; CHECK-NEXT: [[TMP15:%.*]] = sext i32 10 to i64
+; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP7]] to i64
; CHECK-NEXT: [[TMP16:%.*]] = add nsw i64 2, [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i64 3
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT:%.*]], ptr [[A]], i64 0, i32 2
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT]], ptr [[A]], i64 0, i32 2
; CHECK-NEXT: store float 0.000000e+00, ptr [[TMP20]], align 8
; CHECK-NEXT: [[TMP21:%.*]] = load i8, ptr [[TMP19]], align 1
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT]], ptr [[A]], i64 0, i32 3
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
index feb4ad865f3147..289ad5f62c966f 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
@@ -279,10 +279,13 @@ define void @phi_store3(ptr %dst) {
; POW2-ONLY: invoke.cont8.loopexit:
; POW2-ONLY-NEXT: br label [[EXIT]]
; POW2-ONLY: exit:
-; POW2-ONLY-NEXT: [[P_2:%.*]] = phi i32 [ 3, [[ENTRY:%.*]] ], [ 0, [[INVOKE_CONT8_LOOPEXIT:%.*]] ]
-; POW2-ONLY-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[ENTRY]] ], [ poison, [[INVOKE_CONT8_LOOPEXIT]] ]
-; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 2
-; POW2-ONLY-NEXT: store <2 x i32> [[TMP0]], ptr [[DST]], align 4
+; POW2-ONLY-NEXT: [[P_0:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 0, [[INVOKE_CONT8_LOOPEXIT:%.*]] ]
+; POW2-ONLY-NEXT: [[P_1:%.*]] = phi i32 [ 2, [[ENTRY]] ], [ 0, [[INVOKE_CONT8_LOOPEXIT]] ]
+; POW2-ONLY-NEXT: [[P_2:%.*]] = phi i32 [ 3, [[ENTRY]] ], [ 0, [[INVOKE_CONT8_LOOPEXIT]] ]
+; POW2-ONLY-NEXT: [[DST_1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1
+; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
+; POW2-ONLY-NEXT: store i32 [[P_0]], ptr [[DST]], align 4
+; POW2-ONLY-NEXT: store i32 [[P_1]], ptr [[DST_1]], align 4
; POW2-ONLY-NEXT: store i32 [[P_2]], ptr [[DST_2]], align 4
; POW2-ONLY-NEXT: ret void
;
More information about the llvm-commits
mailing list