[llvm] d9715a7 - [SCEV] Don't try to reuse expressions with offset
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 25 00:20:43 PST 2022
Author: Nikita Popov
Date: 2022-02-25T09:16:48+01:00
New Revision: d9715a726674046b177221873d63578dce383feb
URL: https://github.com/llvm/llvm-project/commit/d9715a726674046b177221873d63578dce383feb
DIFF: https://github.com/llvm/llvm-project/commit/d9715a726674046b177221873d63578dce383feb.diff
LOG: [SCEV] Don't try to reuse expressions with offset
SCEVs ExprValueMap currently tracks not only which IR Values
correspond to a given SCEV expression, but additionally stores that
it may be expanded in the form X+Offset. In theory, this allows
reusing existing IR Values in more cases.
In practice, this doesn't seem to be particularly useful (the test
changes are rather underwhelming) and adds a good bit of complexity.
Per https://github.com/llvm/llvm-project/issues/53905, we have an
invalidation issue with these offseted expressions.
Differential Revision: https://reviews.llvm.org/D120311
Added:
Modified:
llvm/include/llvm/Analysis/ScalarEvolution.h
llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
llvm/lib/Analysis/ScalarEvolution.cpp
llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
llvm/test/Transforms/LoopIdiom/memset-runtime-32bit.ll
llvm/test/Transforms/LoopIdiom/memset-runtime-64bit.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 768925433bed..61659bfbd126 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1246,30 +1246,11 @@ class ScalarEvolution {
HasRecMapType HasRecMap;
/// The type for ExprValueMap.
- using ValueOffsetPair = std::pair<Value *, ConstantInt *>;
- using ValueOffsetPairSetVector = SmallSetVector<ValueOffsetPair, 4>;
- using ExprValueMapType = DenseMap<const SCEV *, ValueOffsetPairSetVector>;
+ using ValueSetVector = SmallSetVector<Value *, 4>;
+ using ExprValueMapType = DenseMap<const SCEV *, ValueSetVector>;
/// ExprValueMap -- This map records the original values from which
/// the SCEV expr is generated from.
- ///
- /// We want to represent the mapping as SCEV -> ValueOffsetPair instead
- /// of SCEV -> Value:
- /// Suppose we know S1 expands to V1, and
- /// S1 = S2 + C_a
- /// S3 = S2 + C_b
- /// where C_a and C_b are
diff erent SCEVConstants. Then we'd like to
- /// expand S3 as V1 - C_a + C_b instead of expanding S2 literally.
- /// It is helpful when S2 is a complex SCEV expr.
- ///
- /// In order to do that, we represent ExprValueMap as a mapping from
- /// SCEV to ValueOffsetPair. We will save both S1->{V1, 0} and
- /// S2->{V1, C_a} into the map when we create SCEV for V1. When S3
- /// is expanded, it will first expand S2 to V1 - C_a because of
- /// S2->{V1, C_a} in the map, then expand S3 to V1 - C_a + C_b.
- ///
- /// Note: S->{V, Offset} in the ExprValueMap means S can be expanded
- /// to V - Offset.
ExprValueMapType ExprValueMap;
/// The type for ValueExprMap.
@@ -1300,7 +1281,7 @@ class ScalarEvolution {
DenseMap<const SCEV *, uint32_t> MinTrailingZerosCache;
/// Return the Value set from which the SCEV expr is generated.
- ValueOffsetPairSetVector *getSCEVValues(const SCEV *S);
+ ValueSetVector *getSCEVValues(const SCEV *S);
/// Private helper method for the GetMinTrailingZeros method
uint32_t GetMinTrailingZerosImpl(const SCEV *S);
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 60b772b94a6f..5a9ed598e099 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -385,8 +385,8 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
/// Note that this function does not perform an exhaustive search. I.e if it
/// didn't find any value it does not mean that there is no such value.
///
- Optional<ScalarEvolution::ValueOffsetPair>
- getRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L);
+ Value *getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
+ Loop *L);
/// Returns a suitable insert point after \p I, that dominates \p
/// MustDominate. Skips instructions inserted by the expander.
@@ -444,8 +444,7 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
Value *expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, Value *V);
/// Find a previous Value in ExprValueMap for expand.
- ScalarEvolution::ValueOffsetPair
- FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt);
+ Value *FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt);
Value *expand(const SCEV *S);
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 24fb780d0f7e..05ee7da88609 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -4278,27 +4278,9 @@ bool ScalarEvolution::containsAddRecurrence(const SCEV *S) {
return FoundAddRec;
}
-/// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}.
-/// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an
-/// offset I, then return {S', I}, else return {\p S, nullptr}.
-static std::pair<const SCEV *, ConstantInt *> splitAddExpr(const SCEV *S) {
- const auto *Add = dyn_cast<SCEVAddExpr>(S);
- if (!Add)
- return {S, nullptr};
-
- if (Add->getNumOperands() != 2)
- return {S, nullptr};
-
- auto *ConstOp = dyn_cast<SCEVConstant>(Add->getOperand(0));
- if (!ConstOp)
- return {S, nullptr};
-
- return {Add->getOperand(1), ConstOp->getValue()};
-}
-
/// Return the ValueOffsetPair set for \p S. \p S can be represented
/// by the value and offset from any ValueOffsetPair in the set.
-ScalarEvolution::ValueOffsetPairSetVector *
+ScalarEvolution::ValueSetVector *
ScalarEvolution::getSCEVValues(const SCEV *S) {
ExprValueMapType::iterator SI = ExprValueMap.find_as(S);
if (SI == ExprValueMap.end())
@@ -4306,8 +4288,8 @@ ScalarEvolution::getSCEVValues(const SCEV *S) {
#ifndef NDEBUG
if (VerifySCEVMap) {
// Check there is no dangling Value in the set returned.
- for (const auto &VE : SI->second)
- assert(ValueExprMap.count(VE.first));
+ for (Value *V : SI->second)
+ assert(ValueExprMap.count(V));
}
#endif
return &SI->second;
@@ -4320,18 +4302,9 @@ void ScalarEvolution::eraseValueFromMap(Value *V) {
ValueExprMapType::iterator I = ValueExprMap.find_as(V);
if (I != ValueExprMap.end()) {
const SCEV *S = I->second;
- // Remove {V, 0} from the set of ExprValueMap[S]
+ // Remove V from the set of ExprValueMap[S]
if (auto *SV = getSCEVValues(S))
- SV->remove({V, nullptr});
-
- // Remove {V, Offset} from the set of ExprValueMap[Stripped]
- const SCEV *Stripped;
- ConstantInt *Offset;
- std::tie(Stripped, Offset) = splitAddExpr(S);
- if (Offset != nullptr) {
- if (auto *SV = getSCEVValues(Stripped))
- SV->remove({V, Offset});
- }
+ SV->remove(V);
ValueExprMap.erase(V);
}
}
@@ -4343,7 +4316,7 @@ void ScalarEvolution::insertValueToMap(Value *V, const SCEV *S) {
auto It = ValueExprMap.find_as(V);
if (It == ValueExprMap.end()) {
ValueExprMap.insert({SCEVCallbackVH(V, this), S});
- ExprValueMap[S].insert({V, nullptr});
+ ExprValueMap[S].insert(V);
}
}
@@ -4360,23 +4333,8 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) {
// ValueExprMap before insert S->{V, 0} into ExprValueMap.
std::pair<ValueExprMapType::iterator, bool> Pair =
ValueExprMap.insert({SCEVCallbackVH(V, this), S});
- if (Pair.second) {
- ExprValueMap[S].insert({V, nullptr});
-
- // If S == Stripped + Offset, add Stripped -> {V, Offset} into
- // ExprValueMap.
- const SCEV *Stripped = S;
- ConstantInt *Offset = nullptr;
- std::tie(Stripped, Offset) = splitAddExpr(S);
- // If stripped is SCEVUnknown, don't bother to save
- // Stripped -> {V, offset}. It doesn't simplify and sometimes even
- // increase the complexity of the expansion code.
- // If V is GetElementPtrInst, don't save Stripped -> {V, offset}
- // because it may generate add/sub instead of GEP in SCEV expansion.
- if (Offset != nullptr && !isa<SCEVUnknown>(Stripped) &&
- !isa<GetElementPtrInst>(V))
- ExprValueMap[Stripped].insert({V, Offset});
- }
+ if (Pair.second)
+ ExprValueMap[S].insert(V);
}
return S;
}
@@ -13399,12 +13357,10 @@ void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) {
auto ExprIt = ExprValueMap.find(S);
if (ExprIt != ExprValueMap.end()) {
- for (auto &ValueAndOffset : ExprIt->second) {
- if (ValueAndOffset.second == nullptr) {
- auto ValueIt = ValueExprMap.find_as(ValueAndOffset.first);
- if (ValueIt != ValueExprMap.end())
- ValueExprMap.erase(ValueIt);
- }
+ for (Value *V : ExprIt->second) {
+ auto ValueIt = ValueExprMap.find_as(V);
+ if (ValueIt != ValueExprMap.end())
+ ValueExprMap.erase(ValueIt);
}
ExprValueMap.erase(ExprIt);
}
@@ -13546,7 +13502,7 @@ void ScalarEvolution::verify() const {
// Check that the value is also part of the reverse map.
auto It = ExprValueMap.find(KV.second);
- if (It == ExprValueMap.end() || !It->second.contains({KV.first, nullptr})) {
+ if (It == ExprValueMap.end() || !It->second.contains(KV.first)) {
dbgs() << "Value " << *KV.first
<< " is in ValueExprMap but not in ExprValueMap\n";
std::abort();
@@ -13554,19 +13510,15 @@ void ScalarEvolution::verify() const {
}
for (const auto &KV : ExprValueMap) {
- for (const auto &ValueAndOffset : KV.second) {
- if (ValueAndOffset.second != nullptr)
- continue;
-
- auto It = ValueExprMap.find_as(ValueAndOffset.first);
+ for (Value *V : KV.second) {
+ auto It = ValueExprMap.find_as(V);
if (It == ValueExprMap.end()) {
- dbgs() << "Value " << *ValueAndOffset.first
+ dbgs() << "Value " << *V
<< " is in ExprValueMap but not in ValueExprMap\n";
std::abort();
}
if (It->second != KV.first) {
- dbgs() << "Value " << *ValueAndOffset.first
- << " mapped to " << *It->second
+ dbgs() << "Value " << *V << " mapped to " << *It->second
<< " rather than " << *KV.first << "\n";
std::abort();
}
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 754382aa0845..00c5b5053f9e 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1870,9 +1870,8 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) {
return V;
}
-ScalarEvolution::ValueOffsetPair
-SCEVExpander::FindValueInExprValueMap(const SCEV *S,
- const Instruction *InsertPt) {
+Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
+ const Instruction *InsertPt) {
auto *Set = SE.getSCEVValues(S);
// If the expansion is not in CanonicalMode, and the SCEV contains any
// sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
@@ -1882,9 +1881,7 @@ SCEVExpander::FindValueInExprValueMap(const SCEV *S,
// Choose a Value from the set which dominates the InsertPt.
// InsertPt should be inside the Value's parent loop so as not to break
// the LCSSA form.
- for (auto const &VOPair : *Set) {
- Value *V = VOPair.first;
- ConstantInt *Offset = VOPair.second;
+ for (Value *V : *Set) {
Instruction *EntInst = dyn_cast_or_null<Instruction>(V);
if (!EntInst)
continue;
@@ -1894,11 +1891,11 @@ SCEVExpander::FindValueInExprValueMap(const SCEV *S,
SE.DT.dominates(EntInst, InsertPt) &&
(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
- return {V, Offset};
+ return V;
}
}
}
- return {nullptr, nullptr};
+ return nullptr;
}
// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
@@ -1967,9 +1964,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
Builder.SetInsertPoint(InsertPt);
// Expand the expression into instructions.
- ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt);
- Value *V = VO.first;
-
+ Value *V = FindValueInExprValueMap(S, InsertPt);
if (!V)
V = visit(S);
else {
@@ -1980,21 +1975,6 @@ Value *SCEVExpander::expand(const SCEV *S) {
if (auto *I = dyn_cast<Instruction>(V))
if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I))
I->dropPoisonGeneratingFlags();
-
- if (VO.second) {
- if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) {
- int64_t Offset = VO.second->getSExtValue();
- ConstantInt *Idx =
- ConstantInt::getSigned(VO.second->getType(), -Offset);
- unsigned AS = Vty->getAddressSpace();
- V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
- V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
- "uglygep");
- V = Builder.CreateBitCast(V, Vty);
- } else {
- V = Builder.CreateSub(V, VO.second);
- }
- }
}
// Remember the expanded value for this SCEV at this location.
//
@@ -2176,9 +2156,9 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
return NumElim;
}
-Optional<ScalarEvolution::ValueOffsetPair>
-SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
- Loop *L) {
+Value *SCEVExpander::getRelatedExistingExpansion(const SCEV *S,
+ const Instruction *At,
+ Loop *L) {
using namespace llvm::PatternMatch;
SmallVector<BasicBlock *, 4> ExitingBlocks;
@@ -2195,25 +2175,17 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
continue;
if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
- return ScalarEvolution::ValueOffsetPair(LHS, nullptr);
+ return LHS;
if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
- return ScalarEvolution::ValueOffsetPair(RHS, nullptr);
+ return RHS;
}
// Use expand's logic which is used for reusing a previous Value in
// ExprValueMap. Note that we don't currently model the cost of
// needing to drop poison generating flags on the instruction if we
// want to reuse it. We effectively assume that has zero cost.
- ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At);
- if (VO.first)
- return VO;
-
- // There is potential to make this significantly smarter, but this simple
- // heuristic already gets some interesting cases.
-
- // Can not find suitable value.
- return None;
+ return FindValueInExprValueMap(S, At);
}
template<typename T> static InstructionCost costAndCollectOperands(
diff --git a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
index 75439f811860..443d89aaeaed 100644
--- a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
+++ b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
@@ -115,55 +115,57 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq x1 at GOTPCREL(%rip), %rax
-; CHECK-NEXT: movl (%rax), %ecx
-; CHECK-NEXT: andl $511, %ecx # imm = 0x1FF
-; CHECK-NEXT: leaq 1(%rcx), %r13
-; CHECK-NEXT: movq x4 at GOTPCREL(%rip), %rax
-; CHECK-NEXT: movl %r13d, (%rax)
-; CHECK-NEXT: movq x3 at GOTPCREL(%rip), %rax
; CHECK-NEXT: movl (%rax), %edx
-; CHECK-NEXT: testl %edx, %edx
+; CHECK-NEXT: movl %edx, %eax
+; CHECK-NEXT: andl $511, %eax # imm = 0x1FF
+; CHECK-NEXT: leaq 1(%rax), %rsi
+; CHECK-NEXT: movq x4 at GOTPCREL(%rip), %rcx
+; CHECK-NEXT: movl %esi, (%rcx)
+; CHECK-NEXT: movq x3 at GOTPCREL(%rip), %rcx
+; CHECK-NEXT: movl (%rcx), %ecx
+; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: je .LBB1_18
; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph
-; CHECK-NEXT: movq x5 at GOTPCREL(%rip), %rax
-; CHECK-NEXT: movq (%rax), %r12
-; CHECK-NEXT: movl %edx, %eax
-; CHECK-NEXT: notl %eax
-; CHECK-NEXT: leaq 8(,%rax,8), %r14
-; CHECK-NEXT: imulq %r13, %r14
+; CHECK-NEXT: movq x5 at GOTPCREL(%rip), %rdi
+; CHECK-NEXT: movq (%rdi), %r12
+; CHECK-NEXT: movl %ecx, %edi
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: leaq 8(,%rdi,8), %r14
+; CHECK-NEXT: imulq %rsi, %r14
; CHECK-NEXT: addq %r12, %r14
; CHECK-NEXT: movq x2 at GOTPCREL(%rip), %r15
-; CHECK-NEXT: movl (%r15), %eax
-; CHECK-NEXT: leal 8(,%rcx,8), %ecx
-; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: leaq 8(%r12), %rcx
-; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: leaq 32(%r12), %rbx
-; CHECK-NEXT: shlq $3, %r13
-; CHECK-NEXT: xorl %esi, %esi
-; CHECK-NEXT: movq x0 at GOTPCREL(%rip), %rcx
-; CHECK-NEXT: movq %r12, %rdi
+; CHECK-NEXT: movl (%r15), %ebx
+; CHECK-NEXT: leal 8(,%rax,8), %eax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: leaq 8(%r12), %rax
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: leaq 32(%r12), %rax
+; CHECK-NEXT: andl $511, %edx # imm = 0x1FF
+; CHECK-NEXT: leaq 8(,%rdx,8), %r13
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: movq x0 at GOTPCREL(%rip), %rdx
+; CHECK-NEXT: movq %r12, %rsi
; CHECK-NEXT: jmp .LBB1_2
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_15: # %for.cond1.for.inc3_crit_edge
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: movl %eax, (%r15)
+; CHECK-NEXT: movl %ebx, (%r15)
; CHECK-NEXT: .LBB1_16: # %for.inc3
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: addq %r13, %rdi
-; CHECK-NEXT: incq %rsi
-; CHECK-NEXT: addq %r13, %rbx
-; CHECK-NEXT: incl %edx
+; CHECK-NEXT: addq %r13, %rsi
+; CHECK-NEXT: incq %rdi
+; CHECK-NEXT: addq %r13, %rax
+; CHECK-NEXT: incl %ecx
; CHECK-NEXT: je .LBB1_17
; CHECK-NEXT: .LBB1_2: # %for.cond1thread-pre-split
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB1_12 Depth 2
; CHECK-NEXT: # Child Loop BB1_14 Depth 2
-; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: testl %ebx, %ebx
; CHECK-NEXT: jns .LBB1_16
; CHECK-NEXT: # %bb.3: # %for.body2.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: movslq %eax, %r9
+; CHECK-NEXT: movslq %ebx, %r9
; CHECK-NEXT: testq %r9, %r9
; CHECK-NEXT: movq $-1, %rbp
; CHECK-NEXT: cmovnsq %r9, %rbp
@@ -178,76 +180,76 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: je .LBB1_14
; CHECK-NEXT: # %bb.5: # %vector.memcheck
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
-; CHECK-NEXT: imulq %rsi, %r10
-; CHECK-NEXT: leaq (%r12,%r10), %rax
-; CHECK-NEXT: leaq (%rax,%r9,8), %rax
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; CHECK-NEXT: imulq %rdi, %r11
+; CHECK-NEXT: leaq (%r12,%r11), %rbx
+; CHECK-NEXT: leaq (%rbx,%r9,8), %rbx
; CHECK-NEXT: testq %r9, %r9
-; CHECK-NEXT: movq $-1, %r11
-; CHECK-NEXT: cmovnsq %r9, %r11
-; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: movq $-1, %r10
+; CHECK-NEXT: cmovnsq %r9, %r10
+; CHECK-NEXT: cmpq %rdx, %rbx
; CHECK-NEXT: jae .LBB1_7
; CHECK-NEXT: # %bb.6: # %vector.memcheck
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
-; CHECK-NEXT: leaq (%r10,%r11,8), %rax
-; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; CHECK-NEXT: leaq (%r11,%r10,8), %rbx
+; CHECK-NEXT: cmpq %rdx, %rbx
; CHECK-NEXT: ja .LBB1_14
; CHECK-NEXT: .LBB1_7: # %vector.body.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: leaq -4(%r8), %rax
-; CHECK-NEXT: movq %rax, %r10
-; CHECK-NEXT: shrq $2, %r10
-; CHECK-NEXT: btl $2, %eax
+; CHECK-NEXT: leaq -4(%r8), %rbx
+; CHECK-NEXT: movq %rbx, %r11
+; CHECK-NEXT: shrq $2, %r11
+; CHECK-NEXT: btl $2, %ebx
; CHECK-NEXT: jb .LBB1_8
; CHECK-NEXT: # %bb.9: # %vector.body.prol.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; CHECK-NEXT: movdqu %xmm0, (%rdi,%r9,8)
-; CHECK-NEXT: movdqu %xmm0, 16(%rdi,%r9,8)
-; CHECK-NEXT: movl $4, %r11d
-; CHECK-NEXT: testq %r10, %r10
+; CHECK-NEXT: movdqu %xmm0, (%rsi,%r9,8)
+; CHECK-NEXT: movdqu %xmm0, 16(%rsi,%r9,8)
+; CHECK-NEXT: movl $4, %r10d
+; CHECK-NEXT: testq %r11, %r11
; CHECK-NEXT: jne .LBB1_11
; CHECK-NEXT: jmp .LBB1_13
; CHECK-NEXT: .LBB1_8: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: xorl %r11d, %r11d
-; CHECK-NEXT: testq %r10, %r10
+; CHECK-NEXT: xorl %r10d, %r10d
+; CHECK-NEXT: testq %r11, %r11
; CHECK-NEXT: je .LBB1_13
; CHECK-NEXT: .LBB1_11: # %vector.body.preheader.new
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; CHECK-NEXT: movq %r11, %rax
-; CHECK-NEXT: subq %r8, %rax
-; CHECK-NEXT: addq %r9, %r11
-; CHECK-NEXT: leaq (%rbx,%r11,8), %r11
+; CHECK-NEXT: movq %r10, %rbx
+; CHECK-NEXT: subq %r8, %rbx
+; CHECK-NEXT: addq %r9, %r10
+; CHECK-NEXT: leaq (%rax,%r10,8), %r10
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_12: # %vector.body
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: movdqu %xmm0, -32(%r11)
-; CHECK-NEXT: movdqu %xmm0, -16(%r11)
-; CHECK-NEXT: movdqu %xmm0, (%r11)
-; CHECK-NEXT: movdqu %xmm0, 16(%r11)
-; CHECK-NEXT: addq $64, %r11
-; CHECK-NEXT: addq $8, %rax
+; CHECK-NEXT: movdqu %xmm0, -32(%r10)
+; CHECK-NEXT: movdqu %xmm0, -16(%r10)
+; CHECK-NEXT: movdqu %xmm0, (%r10)
+; CHECK-NEXT: movdqu %xmm0, 16(%r10)
+; CHECK-NEXT: addq $64, %r10
+; CHECK-NEXT: addq $8, %rbx
; CHECK-NEXT: jne .LBB1_12
; CHECK-NEXT: .LBB1_13: # %middle.block
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: addq %r8, %r9
; CHECK-NEXT: cmpq %r8, %rbp
-; CHECK-NEXT: movq %r9, %rax
+; CHECK-NEXT: movq %r9, %rbx
; CHECK-NEXT: je .LBB1_15
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_14: # %for.body2
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: movq (%rcx), %rax
-; CHECK-NEXT: movq %rax, (%rdi,%r9,8)
-; CHECK-NEXT: leaq 1(%r9), %rax
+; CHECK-NEXT: movq (%rdx), %rbp
+; CHECK-NEXT: movq %rbp, (%rsi,%r9,8)
+; CHECK-NEXT: leaq 1(%r9), %rbx
; CHECK-NEXT: cmpq $-1, %r9
-; CHECK-NEXT: movq %rax, %r9
+; CHECK-NEXT: movq %rbx, %r9
; CHECK-NEXT: jl .LBB1_14
; CHECK-NEXT: jmp .LBB1_15
; CHECK-NEXT: .LBB1_17: # %for.cond.for.end5_crit_edge
diff --git a/llvm/test/Transforms/LoopIdiom/memset-runtime-32bit.ll b/llvm/test/Transforms/LoopIdiom/memset-runtime-32bit.ll
index 69e96d53f220..d60f0c6cd2a1 100644
--- a/llvm/test/Transforms/LoopIdiom/memset-runtime-32bit.ll
+++ b/llvm/test/Transforms/LoopIdiom/memset-runtime-32bit.ll
@@ -239,7 +239,7 @@ define dso_local void @NegativeFor64(i32* %ar, i64 %n, i64 %m) #0 {
; CHECK-NEXT: [[MUL3:%.*]] = mul nsw i64 [[M:%.*]], 4
; CHECK-NEXT: [[CONV4:%.*]] = trunc i64 [[MUL3]] to i32
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[M]] to i32
-; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[CONV]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 2
; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 4 [[AR1]], i8 0, i32 [[TMP3]], i1 false)
diff --git a/llvm/test/Transforms/LoopIdiom/memset-runtime-64bit.ll b/llvm/test/Transforms/LoopIdiom/memset-runtime-64bit.ll
index 5d485b82ada7..ed74a17fa7b8 100644
--- a/llvm/test/Transforms/LoopIdiom/memset-runtime-64bit.ll
+++ b/llvm/test/Transforms/LoopIdiom/memset-runtime-64bit.ll
@@ -235,7 +235,7 @@ define void @Negative32(i32* %ar, i32 %n, i32 %m) {
; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[M:%.*]] to i64
; CHECK-NEXT: [[CONV2:%.*]] = sext i32 [[M]] to i64
; CHECK-NEXT: [[MUL3:%.*]] = mul i64 [[CONV2]], 4
-; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[CONV]], -1
+; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[CONV1]], [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 2
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[AR1]], i8 0, i64 [[TMP2]], i1 false)
More information about the llvm-commits
mailing list