[llvm] e9cced2 - Recommit "[LAA] Initial support for runtime checks with pointer selects."
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 17 12:07:05 PDT 2022
Author: Florian Hahn
Date: 2022-06-17T21:06:26+02:00
New Revision: e9cced27390ba38eac1144aa1240281a1edadec0
URL: https://github.com/llvm/llvm-project/commit/e9cced27390ba38eac1144aa1240281a1edadec0
DIFF: https://github.com/llvm/llvm-project/commit/e9cced27390ba38eac1144aa1240281a1edadec0.diff
LOG: Recommit "[LAA] Initial support for runtime checks with pointer selects."
This reverts commit 7aa8a678826dea86ff3e6c7df9d2a8a6ef868f5d.
This version includes fixes to address issues uncovered after
the commit landed and discussed at D11448.
Those include:
* Limit select-traversal to selects inside the loop.
* Freeze pointers resulting from looking through selects to avoid
branch-on-poison.
Added:
Modified:
llvm/include/llvm/Analysis/LoopAccessAnalysis.h
llvm/lib/Analysis/LoopAccessAnalysis.cpp
llvm/lib/Transforms/Utils/LoopUtils.cpp
llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 713d68fb352aa..8f71ce9e96c05 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -343,7 +343,7 @@ struct RuntimeCheckingPtrGroup {
/// of success, false otherwise.
bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck);
bool addPointer(unsigned Index, const SCEV *Start, const SCEV *End,
- unsigned AS, ScalarEvolution &SE);
+ unsigned AS, bool NeedsFreeze, ScalarEvolution &SE);
/// The SCEV expression which represents the upper bound of all the
/// pointers in this group.
@@ -355,6 +355,9 @@ struct RuntimeCheckingPtrGroup {
SmallVector<unsigned, 2> Members;
/// Address space of the involved pointers.
unsigned AddressSpace;
+ /// Whether the pointer needs to be frozen after expansion, e.g. because it
+ /// may be poison outside the loop.
+ bool NeedsFreeze = false;
};
/// A memcheck which made up of a pair of grouped pointers.
@@ -366,10 +369,12 @@ struct PointerDiffInfo {
const SCEV *SrcStart;
const SCEV *SinkStart;
unsigned AccessSize;
+ bool NeedsFreeze;
PointerDiffInfo(const SCEV *SrcStart, const SCEV *SinkStart,
- unsigned AccessSize)
- : SrcStart(SrcStart), SinkStart(SinkStart), AccessSize(AccessSize) {}
+ unsigned AccessSize, bool NeedsFreeze)
+ : SrcStart(SrcStart), SinkStart(SinkStart), AccessSize(AccessSize),
+ NeedsFreeze(NeedsFreeze) {}
};
/// Holds information about the memory runtime legality checks to verify
@@ -396,13 +401,15 @@ class RuntimePointerChecking {
unsigned AliasSetId;
/// SCEV for the access.
const SCEV *Expr;
+ /// True if the pointer expressions needs to be frozen after expansion.
+ bool NeedsFreeze;
PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End,
bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId,
- const SCEV *Expr)
+ const SCEV *Expr, bool NeedsFreeze)
: PointerValue(PointerValue), Start(Start), End(End),
IsWritePtr(IsWritePtr), DependencySetId(DependencySetId),
- AliasSetId(AliasSetId), Expr(Expr) {}
+ AliasSetId(AliasSetId), Expr(Expr), NeedsFreeze(NeedsFreeze) {}
};
RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE)
@@ -420,9 +427,9 @@ class RuntimePointerChecking {
/// according to the assumptions that we've made during the analysis.
/// The method might also version the pointer stride according to \p Strides,
/// and add new predicates to \p PSE.
- void insert(Loop *Lp, Value *Ptr, Type *AccessTy, bool WritePtr,
- unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides,
- PredicatedScalarEvolution &PSE);
+ void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy,
+ bool WritePtr, unsigned DepSetId, unsigned ASId,
+ PredicatedScalarEvolution &PSE, bool NeedsFreeze);
/// No run-time memory checking is necessary.
bool empty() const { return Pointers.empty(); }
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 4dbdb5cde6398..79161db9b5e45 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -47,6 +47,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
@@ -65,6 +66,7 @@
#include <vector>
using namespace llvm;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "loop-accesses"
@@ -171,7 +173,8 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
: High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
AddressSpace(RtCheck.Pointers[Index]
.PointerValue->getType()
- ->getPointerAddressSpace()) {
+ ->getPointerAddressSpace()),
+ NeedsFreeze(RtCheck.Pointers[Index].NeedsFreeze) {
Members.push_back(Index);
}
@@ -188,22 +191,20 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
///
/// There is no conflict when the intervals are disjoint:
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
-void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, Type *AccessTy,
- bool WritePtr, unsigned DepSetId,
- unsigned ASId,
- const ValueToValueMap &Strides,
- PredicatedScalarEvolution &PSE) {
- // Get the stride replaced scev.
- const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
+void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
+ Type *AccessTy, bool WritePtr,
+ unsigned DepSetId, unsigned ASId,
+ PredicatedScalarEvolution &PSE,
+ bool NeedsFreeze) {
ScalarEvolution *SE = PSE.getSE();
const SCEV *ScStart;
const SCEV *ScEnd;
- if (SE->isLoopInvariant(Sc, Lp)) {
- ScStart = ScEnd = Sc;
+ if (SE->isLoopInvariant(PtrExpr, Lp)) {
+ ScStart = ScEnd = PtrExpr;
} else {
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr);
assert(AR && "Invalid addrec expression");
const SCEV *Ex = PSE.getBackedgeTakenCount();
@@ -230,7 +231,8 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, Type *AccessTy,
const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
- Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
+ Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,
+ NeedsFreeze);
}
void RuntimePointerChecking::tryToCreateDiffCheck(
@@ -315,7 +317,8 @@ void RuntimePointerChecking::tryToCreateDiffCheck(
CanUseDiffCheck = false;
return;
}
- DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize);
+ DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
+ Src->NeedsFreeze || Sink->NeedsFreeze);
}
SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() {
@@ -370,11 +373,12 @@ bool RuntimeCheckingPtrGroup::addPointer(unsigned Index,
return addPointer(
Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
- *RtCheck.SE);
+ RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);
}
bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
const SCEV *End, unsigned AS,
+ bool NeedsFreeze,
ScalarEvolution &SE) {
assert(AddressSpace == AS &&
"all pointers in a checking group must be in the same address space");
@@ -399,6 +403,7 @@ bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
High = End;
Members.push_back(Index);
+ this->NeedsFreeze |= NeedsFreeze;
return true;
}
@@ -456,9 +461,11 @@ void RuntimePointerChecking::groupChecks(
unsigned TotalComparisons = 0;
- DenseMap<Value *, unsigned> PositionMap;
- for (unsigned Index = 0; Index < Pointers.size(); ++Index)
- PositionMap[Pointers[Index].PointerValue] = Index;
+ DenseMap<Value *, SmallVector<unsigned>> PositionMap;
+ for (unsigned Index = 0; Index < Pointers.size(); ++Index) {
+ auto Iter = PositionMap.insert({Pointers[Index].PointerValue, {}});
+ Iter.first->second.push_back(Index);
+ }
// We need to keep track of what pointers we've already seen so we
// don't process them twice.
@@ -489,34 +496,35 @@ void RuntimePointerChecking::groupChecks(
auto PointerI = PositionMap.find(MI->getPointer());
assert(PointerI != PositionMap.end() &&
"pointer in equivalence class not found in PositionMap");
- unsigned Pointer = PointerI->second;
- bool Merged = false;
- // Mark this pointer as seen.
- Seen.insert(Pointer);
-
- // Go through all the existing sets and see if we can find one
- // which can include this pointer.
- for (RuntimeCheckingPtrGroup &Group : Groups) {
- // Don't perform more than a certain amount of comparisons.
- // This should limit the cost of grouping the pointers to something
- // reasonable. If we do end up hitting this threshold, the algorithm
- // will create separate groups for all remaining pointers.
- if (TotalComparisons > MemoryCheckMergeThreshold)
- break;
-
- TotalComparisons++;
-
- if (Group.addPointer(Pointer, *this)) {
- Merged = true;
- break;
+ for (unsigned Pointer : PointerI->second) {
+ bool Merged = false;
+ // Mark this pointer as seen.
+ Seen.insert(Pointer);
+
+ // Go through all the existing sets and see if we can find one
+ // which can include this pointer.
+ for (RuntimeCheckingPtrGroup &Group : Groups) {
+ // Don't perform more than a certain amount of comparisons.
+ // This should limit the cost of grouping the pointers to something
+ // reasonable. If we do end up hitting this threshold, the algorithm
+ // will create separate groups for all remaining pointers.
+ if (TotalComparisons > MemoryCheckMergeThreshold)
+ break;
+
+ TotalComparisons++;
+
+ if (Group.addPointer(Pointer, *this)) {
+ Merged = true;
+ break;
+ }
}
- }
- if (!Merged)
- // We couldn't add this pointer to any existing set or the threshold
- // for the number of comparisons has been reached. Create a new group
- // to hold the current pointer.
- Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
+ if (!Merged)
+ // We couldn't add this pointer to any existing set or the threshold
+ // for the number of comparisons has been reached. Create a new group
+ // to hold the current pointer.
+ Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
+ }
}
// We've computed the grouped checks for this partition.
@@ -715,11 +723,8 @@ class AccessAnalysis {
/// Check whether a pointer can participate in a runtime bounds check.
/// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
/// by adding run-time checks (overflow checks) if necessary.
-static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
- const ValueToValueMap &Strides, Value *Ptr,
- Loop *L, bool Assume) {
- const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
-
+static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr,
+ const SCEV *PtrScev, Loop *L, bool Assume) {
// The bounds for loop-invariant pointer is trivial.
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
return true;
@@ -782,34 +787,67 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
bool Assume) {
Value *Ptr = Access.getPointer();
- if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
- return false;
+ ScalarEvolution &SE = *PSE.getSE();
+ SmallVector<std::pair<const SCEV *, bool>> TranslatedPtrs;
+ auto *SI = dyn_cast<SelectInst>(Ptr);
+ // Look through selects in the current loop.
+ if (SI && !TheLoop->isLoopInvariant(SI)) {
+ TranslatedPtrs = {
+ std::make_pair(SE.getSCEV(SI->getOperand(1)),
+ !isGuaranteedNotToBeUndefOrPoison(SI->getOperand(1))),
+ std::make_pair(SE.getSCEV(SI->getOperand(2)),
+ !isGuaranteedNotToBeUndefOrPoison(SI->getOperand(2)))};
+ } else
+ TranslatedPtrs = {
+ std::make_pair(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false)};
- // When we run after a failing dependency check we have to make sure
- // we don't have wrapping pointers.
- if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) {
- auto *Expr = PSE.getSCEV(Ptr);
- if (!Assume || !isa<SCEVAddRecExpr>(Expr))
+ for (auto &P : TranslatedPtrs) {
+ const SCEV *PtrExpr = P.first;
+ if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume))
return false;
- PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+
+ // When we run after a failing dependency check we have to make sure
+ // we don't have wrapping pointers.
+ if (ShouldCheckWrap) {
+ // Skip wrap checking when translating pointers.
+ if (TranslatedPtrs.size() > 1)
+ return false;
+
+ if (!isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) {
+ auto *Expr = PSE.getSCEV(Ptr);
+ if (!Assume || !isa<SCEVAddRecExpr>(Expr))
+ return false;
+ PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+ }
+ }
+ // If there's only one option for Ptr, look it up after bounds and wrap
+ // checking, because assumptions might have been added to PSE.
+ if (TranslatedPtrs.size() == 1)
+ TranslatedPtrs[0] = std::make_pair(
+ replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false);
}
- // The id of the dependence set.
- unsigned DepId;
+ for (auto &P : TranslatedPtrs) {
+ const SCEV *PtrExpr = P.first;
- if (isDependencyCheckNeeded()) {
- Value *Leader = DepCands.getLeaderValue(Access).getPointer();
- unsigned &LeaderId = DepSetId[Leader];
- if (!LeaderId)
- LeaderId = RunningDepId++;
- DepId = LeaderId;
- } else
- // Each access has its own dependence set.
- DepId = RunningDepId++;
+ // The id of the dependence set.
+ unsigned DepId;
- bool IsWrite = Access.getInt();
- RtCheck.insert(TheLoop, Ptr, AccessTy, IsWrite, DepId, ASId, StridesMap, PSE);
- LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+ if (isDependencyCheckNeeded()) {
+ Value *Leader = DepCands.getLeaderValue(Access).getPointer();
+ unsigned &LeaderId = DepSetId[Leader];
+ if (!LeaderId)
+ LeaderId = RunningDepId++;
+ DepId = LeaderId;
+ } else
+ // Each access has its own dependence set.
+ DepId = RunningDepId++;
+
+ bool IsWrite = Access.getInt();
+ RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,
+ P.second);
+ LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+ }
return true;
}
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 965b35a50978b..1310abbc126ab 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1525,6 +1525,11 @@ static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
+ if (CG->NeedsFreeze) {
+ IRBuilder<> Builder(Loc);
+ Start = Builder.CreateFreeze(Start, Start->getName() + ".fr");
+ End = Builder.CreateFreeze(End, End->getName() + ".fr");
+ }
LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
return {Start, End};
}
@@ -1623,6 +1628,11 @@ Value *llvm::addDiffRuntimeChecks(
ConstantInt::get(Ty, IC * C.AccessSize));
Value *Sink = Expander.expandCodeFor(C.SinkStart, Ty, Loc);
Value *Src = Expander.expandCodeFor(C.SrcStart, Ty, Loc);
+ if (C.NeedsFreeze) {
+ IRBuilder<> Builder(Loc);
+ Sink = Builder.CreateFreeze(Sink, Sink->getName() + ".fr");
+ Src = Builder.CreateFreeze(Src, Src->getName() + ".fr");
+ }
Value *Diff = ChkBuilder.CreateSub(Sink, Src);
Value *IsConflict =
ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "
diff .check");
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
index f0dbc35588896..b63c2f0cc556d 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
@@ -4,10 +4,32 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
; CHECK-LABEL: function 'forked_ptrs_simple':
; CHECK-NEXT: loop:
-; CHECK-NEXT: Report: cannot identify array bounds
+; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[G1:.+]]):
+; CHECK-NEXT: %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv
+; CHECK-NEXT: %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv
+; CHECK-NEXT: Against group ([[G2:.+]]):
+; CHECK-NEXT: %select = select i1 %cmp, float* %gep.1, float* %gep.2
+; CHECK-NEXT: Check 1:
+; CHECK-NEXT: Comparing group ([[G1]]):
+; CHECK-NEXT: %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv
+; CHECK-NEXT: %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv
+; CHECK-NEXT: Against group ([[G3:.+]]):
+; CHECK-NEXT: %select = select i1 %cmp, float* %gep.1, float* %gep.2
; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[G1]]
+; CHECK-NEXT: (Low: %Dest High: (400 + %Dest))
+; CHECK-NEXT: Member: {%Dest,+,4}<nuw><%loop>
+; CHECK-NEXT: Member: {%Dest,+,4}<nuw><%loop>
+; CHECK-NEXT: Group [[G2]]:
+; CHECK-NEXT: (Low: %Base1 High: (400 + %Base1))
+; CHECK-NEXT: Member: {%Base1,+,4}<nw><%loop>
+; CHECK-NEXT: Group [[G3]]:
+; CHECK-NEXT: (Low: %Base2 High: (400 + %Base2))
+; CHECK-NEXT: Member: {%Base2,+,4}<nw><%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll b/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll
index 4be87da0eaaef..100b5f10a3855 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll
@@ -9,7 +9,7 @@ define void @test1_select_invariant(ptr %src.1, ptr %src.2, ptr %dst, i1 %c, i8
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 2
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[N]], -1
; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64
@@ -19,7 +19,42 @@ define void @test1_select_invariant(ptr %src.1, ptr %src.2, ptr %dst, i1 %c, i8
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR_SEL]], [[UGLYGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %scalar.ph, label %vector.ph
+; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
+; CHECK-NEXT: [[IND_END:%.*]] = trunc i32 [[N_VEC]] to i8
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8
+; CHECK-NEXT: [[INDUCTION:%.*]] = add i8 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[INDUCTION2:%.*]] = add i8 [[OFFSET_IDX]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[PTR_SEL]], align 8, !alias.scope !0
+; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[PTR_SEL]], align 8, !alias.scope !0
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i8 [[INDUCTION]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i8 [[INDUCTION2]]
+; CHECK-NEXT: store i8 [[TMP6]], ptr [[TMP8]], align 2, !alias.scope !3, !noalias !0
+; CHECK-NEXT: store i8 [[TMP7]], ptr [[TMP9]], align 2, !alias.scope !3, !noalias !0
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[PTR_SEL]], align 8
+; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i8 [[IV]]
+; CHECK-NEXT: store i8 [[L_1]], ptr [[GEP_DST]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
;
entry:
%ptr.sel = select i1 %c, ptr %src.1, ptr %src.2
@@ -41,8 +76,69 @@ exit:
define void @test_loop_dependent_select1(ptr %src.1, ptr %src.2, ptr %dst, i1 %c, i8 %n) {
; CHECK-LABEL: @test_loop_dependent_select1(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br label %loop
-; CHECK-NOT: vector.body:
+; CHECK-NEXT: [[SRC_23:%.*]] = ptrtoint ptr [[SRC_2:%.*]] to i64
+; CHECK-NEXT: [[SRC_12:%.*]] = ptrtoint ptr [[SRC_1:%.*]] to i64
+; CHECK-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST:%.*]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[N:%.*]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 2
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[DST1_FR:%.*]] = freeze i64 [[DST1]]
+; CHECK-NEXT: [[SRC_12_FR:%.*]] = freeze i64 [[SRC_12]]
+; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[DST1_FR]], [[SRC_12_FR]]
+; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2
+; CHECK-NEXT: [[DST1_FR4:%.*]] = freeze i64 [[DST1]]
+; CHECK-NEXT: [[SRC_23_FR:%.*]] = freeze i64 [[SRC_23]]
+; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[DST1_FR4]], [[SRC_23_FR]]
+; CHECK-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i64 [[TMP4]], 2
+; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK5]]
+; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
+; CHECK-NEXT: [[IND_END:%.*]] = trunc i32 [[N_VEC]] to i8
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8
+; CHECK-NEXT: [[INDUCTION:%.*]] = add i8 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[INDUCTION6:%.*]] = add i8 [[OFFSET_IDX]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC_1]], i8 [[INDUCTION]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SRC_1]], i8 [[INDUCTION6]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC_2]], i8 [[INDUCTION]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC_2]], i8 [[INDUCTION6]]
+; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[C:%.*]], ptr [[TMP5]], ptr [[TMP7]]
+; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[C]], ptr [[TMP6]], ptr [[TMP8]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 8
+; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP10]], align 8
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i8 [[INDUCTION]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST]], i8 [[INDUCTION6]]
+; CHECK-NEXT: store i8 [[TMP11]], ptr [[TMP13]], align 2
+; CHECK-NEXT: store i8 [[TMP12]], ptr [[TMP14]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i8, ptr [[SRC_1]], i8 [[IV]]
+; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i8, ptr [[SRC_2]], i8 [[IV]]
+; CHECK-NEXT: [[PTR_SEL:%.*]] = select i1 [[C]], ptr [[GEP_SRC_1]], ptr [[GEP_SRC_2]]
+; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[PTR_SEL]], align 8
+; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i8 [[IV]]
+; CHECK-NEXT: store i8 [[L_1]], ptr [[GEP_DST]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
;
entry:
br label %loop
@@ -67,8 +163,71 @@ exit:
define void @test_loop_dependent_select2(ptr %src.1, ptr %src.2, ptr %dst, i8 %n, i8 %x) {
; CHECK-LABEL: @test_loop_dependent_select2(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br label %loop
-; CHECK-NOT: vector.body:
+; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[N:%.*]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 2
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[N]], -1
+; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP5]]
+; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[SRC_1:%.*]], i64 1
+; CHECK-NEXT: [[SRC_1_FR:%.*]] = freeze ptr [[SRC_1]]
+; CHECK-NEXT: [[UGLYGEP1_FR:%.*]] = freeze ptr [[UGLYGEP1]]
+; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[SRC_2:%.*]], i64 1
+; CHECK-NEXT: [[SRC_2_FR:%.*]] = freeze ptr [[SRC_2]]
+; CHECK-NEXT: [[UGLYGEP2_FR:%.*]] = freeze ptr [[UGLYGEP2]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP1_FR]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC_1_FR]], [[UGLYGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP2_FR]]
+; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[SRC_2_FR]], [[UGLYGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
+; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
+; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
+; CHECK-NEXT: [[IND_END:%.*]] = trunc i32 [[N_VEC]] to i8
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8
+; CHECK-NEXT: [[INDUCTION:%.*]] = add i8 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[INDUCTION6:%.*]] = add i8 [[OFFSET_IDX]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i8 [[INDUCTION]], [[X:%.*]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i8 [[INDUCTION6]], [[X]]
+; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], ptr [[SRC_1]], ptr [[SRC_2]]
+; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], ptr [[SRC_1]], ptr [[SRC_2]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 8, !alias.scope !10
+; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 8, !alias.scope !10
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i8 [[INDUCTION]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i8 [[INDUCTION6]]
+; CHECK-NEXT: store i8 [[TMP10]], ptr [[TMP12]], align 2, !alias.scope !13, !noalias !15
+; CHECK-NEXT: store i8 [[TMP11]], ptr [[TMP13]], align 2, !alias.scope !13, !noalias !15
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[IV]], [[X]]
+; CHECK-NEXT: [[PTR_SEL:%.*]] = select i1 [[C]], ptr [[SRC_1]], ptr [[SRC_2]]
+; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[PTR_SEL]], align 8
+; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i8 [[IV]]
+; CHECK-NEXT: store i8 [[L_1]], ptr [[GEP_DST]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
;
entry:
br label %loop
@@ -91,8 +250,69 @@ exit:
define void @test_loop_dependent_select_first_ptr_noundef(ptr noundef %src.1, ptr %src.2, ptr %dst, i8 %n, i8 %x) {
; CHECK-LABEL: @test_loop_dependent_select_first_ptr_noundef(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br label %loop
-; CHECK-NOT: vector.body:
+; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[N:%.*]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 2
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[N]], -1
+; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP5]]
+; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[SRC_1:%.*]], i64 1
+; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[SRC_2:%.*]], i64 1
+; CHECK-NEXT: [[SRC_2_FR:%.*]] = freeze ptr [[SRC_2]]
+; CHECK-NEXT: [[UGLYGEP2_FR:%.*]] = freeze ptr [[UGLYGEP2]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP1]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC_1]], [[UGLYGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP2_FR]]
+; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[SRC_2_FR]], [[UGLYGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
+; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
+; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
+; CHECK-NEXT: [[IND_END:%.*]] = trunc i32 [[N_VEC]] to i8
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8
+; CHECK-NEXT: [[INDUCTION:%.*]] = add i8 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[INDUCTION6:%.*]] = add i8 [[OFFSET_IDX]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i8 [[INDUCTION]], [[X:%.*]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i8 [[INDUCTION6]], [[X]]
+; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], ptr [[SRC_1]], ptr [[SRC_2]]
+; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], ptr [[SRC_1]], ptr [[SRC_2]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 8, !alias.scope !19
+; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 8, !alias.scope !19
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i8 [[INDUCTION]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i8 [[INDUCTION6]]
+; CHECK-NEXT: store i8 [[TMP10]], ptr [[TMP12]], align 2, !alias.scope !22, !noalias !24
+; CHECK-NEXT: store i8 [[TMP11]], ptr [[TMP13]], align 2, !alias.scope !22, !noalias !24
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[IV]], [[X]]
+; CHECK-NEXT: [[PTR_SEL:%.*]] = select i1 [[C]], ptr [[SRC_1]], ptr [[SRC_2]]
+; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[PTR_SEL]], align 8
+; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i8 [[IV]]
+; CHECK-NEXT: store i8 [[L_1]], ptr [[GEP_DST]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
;
entry:
br label %loop
@@ -115,8 +335,69 @@ exit:
define void @test_loop_dependent_select_second_ptr_noundef(ptr %src.1, ptr noundef %src.2, ptr %dst, i8 %n, i8 %x) {
; CHECK-LABEL: @test_loop_dependent_select_second_ptr_noundef(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br label %loop
-; CHECK-NOT: vector.body:
+; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[N:%.*]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 2
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK: vector.memcheck:
+; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[N]], -1
+; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP5]]
+; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[SRC_1:%.*]], i64 1
+; CHECK-NEXT: [[SRC_1_FR:%.*]] = freeze ptr [[SRC_1]]
+; CHECK-NEXT: [[UGLYGEP1_FR:%.*]] = freeze ptr [[UGLYGEP1]]
+; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[SRC_2:%.*]], i64 1
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP1_FR]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC_1_FR]], [[UGLYGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP2]]
+; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[SRC_2]], [[UGLYGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
+; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
+; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
+; CHECK-NEXT: [[IND_END:%.*]] = trunc i32 [[N_VEC]] to i8
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8
+; CHECK-NEXT: [[INDUCTION:%.*]] = add i8 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[INDUCTION6:%.*]] = add i8 [[OFFSET_IDX]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i8 [[INDUCTION]], [[X:%.*]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i8 [[INDUCTION6]], [[X]]
+; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], ptr [[SRC_1]], ptr [[SRC_2]]
+; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], ptr [[SRC_1]], ptr [[SRC_2]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 8, !alias.scope !28
+; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 8, !alias.scope !28
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i8 [[INDUCTION]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i8 [[INDUCTION6]]
+; CHECK-NEXT: store i8 [[TMP10]], ptr [[TMP12]], align 2, !alias.scope !31, !noalias !33
+; CHECK-NEXT: store i8 [[TMP11]], ptr [[TMP13]], align 2, !alias.scope !31, !noalias !33
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[IV]], [[X]]
+; CHECK-NEXT: [[PTR_SEL:%.*]] = select i1 [[C]], ptr [[SRC_1]], ptr [[SRC_2]]
+; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[PTR_SEL]], align 8
+; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i8 [[IV]]
+; CHECK-NEXT: store i8 [[L_1]], ptr [[GEP_DST]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP36:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
;
entry:
br label %loop
More information about the llvm-commits
mailing list