[llvm] 3c810b7 - [LV] Add initial legality checks for early exit loops with side effects (#145663)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 10 05:54:56 PDT 2025
Author: Graham Hunter
Date: 2025-09-10T13:54:52+01:00
New Revision: 3c810b76b97456e4e1c115dcf3238a799067c466
URL: https://github.com/llvm/llvm-project/commit/3c810b76b97456e4e1c115dcf3238a799067c466
DIFF: https://github.com/llvm/llvm-project/commit/3c810b76b97456e4e1c115dcf3238a799067c466.diff
LOG: [LV] Add initial legality checks for early exit loops with side effects (#145663)
This adds initial support to LoopVectorizationLegality to analyze loops
with side effects (particularly stores to memory) and an uncountable
exit. This patch alone doesn't enable any new transformations, but
does give clearer reasons for rejecting vectorization for such a loop.
The intent is for a loop like the following to pass the specific checks,
and only be rejected at the end until the transformation code is
committed:
```
// Assume a is marked restrict
// Assume b is known to be large enough to access up to b[N-1]
for (int i = 0; i < N; ++) {
a[i]++;
if (b[i] > threshold)
break;
}
```
Added:
Modified:
llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/control-flow.ll
llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 60f92735055bc..405d4a742f37b 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -251,15 +251,18 @@ struct HistogramInfo {
/// induction variable and the
diff erent reduction variables.
class LoopVectorizationLegality {
public:
- LoopVectorizationLegality(
- Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
- TargetTransformInfo *TTI, TargetLibraryInfo *TLI, Function *F,
- LoopAccessInfoManager &LAIs, LoopInfo *LI, OptimizationRemarkEmitter *ORE,
- LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
- AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
+ LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE,
+ DominatorTree *DT, TargetTransformInfo *TTI,
+ TargetLibraryInfo *TLI, Function *F,
+ LoopAccessInfoManager &LAIs, LoopInfo *LI,
+ OptimizationRemarkEmitter *ORE,
+ LoopVectorizationRequirements *R,
+ LoopVectorizeHints *H, DemandedBits *DB,
+ AssumptionCache *AC, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, AAResults *AA)
: TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), LAIs(LAIs),
- ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI),
- PSI(PSI) {}
+ ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI), PSI(PSI),
+ AA(AA) {}
/// ReductionList contains the reduction descriptors for all
/// of the reductions that were found in the loop.
@@ -407,6 +410,14 @@ class LoopVectorizationLegality {
return UncountableExitingBB;
}
+ /// Returns true if this is an early exit loop with state-changing or
+ /// potentially-faulting operations and the condition for the uncountable
+ /// exit must be determined before any of the state changes or potentially
+ /// faulting operations take place.
+ bool hasUncountableExitWithSideEffects() const {
+ return UncountableExitWithSideEffects;
+ }
+
/// Return true if there is store-load forwarding dependencies.
bool isSafeForAnyStoreLoadForwardDistances() const {
return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
@@ -524,20 +535,87 @@ class LoopVectorizationLegality {
/// Returns true if this is an early exit loop that can be vectorized.
/// Currently, a loop with an uncountable early exit is considered
/// vectorizable if:
- /// 1. There are no writes to memory in the loop.
+ /// 1. Writes to memory will access
diff erent underlying objects than
+ /// any load used as part of the uncountable exit condition.
/// 2. The loop has only one early uncountable exit
/// 3. The early exit block dominates the latch block.
/// 4. The latch block has an exact exit count.
/// 5. The loop does not contain reductions or recurrences.
/// 6. We can prove at compile-time that loops will not contain faulting
- /// loads.
+ /// loads, or that any faulting loads would also occur in a purely
+ /// scalar loop.
/// 7. It is safe to speculatively execute instructions such as divide or
- /// call instructions.
+ /// call instructions.
/// The list above is not based on theoretical limitations of vectorization,
/// but simply a statement that more work is needed to support these
/// additional cases safely.
bool isVectorizableEarlyExitLoop();
+ /// When vectorizing an early exit loop containing side effects, we need to
+ /// determine whether an uncounted exit will be taken before any operation
+ /// that has side effects.
+ ///
+ /// Consider a loop like the following:
+ /// for (int i = 0; i < N; ++i) {
+ /// a[i] = b[i];
+ /// if (c[i] == 0)
+ /// break;
+ /// }
+ ///
+ /// We have both a load and a store operation occurring before the condition
+ /// is checked for early termination. We could potentially restrict
+ /// vectorization to cases where we know all addresses are guaranteed to be
+ /// dereferenceable, which would allow the load before the condition check to
+ /// be vectorized.
+ ///
+ /// The store, however, should not execute across all lanes if early
+ /// termination occurs before the end of the vector. We must only store to the
+ /// locations that would have been stored to by a scalar loop. So we need to
+ /// know what the result of 'c[i] == 0' is before performing the vector store,
+ /// with or without masking.
+ ///
+ /// We can either do this by moving the condition load to the top of the
+ /// vector body and using the comparison to create masks for other operations
+ /// in the loop, or by looking ahead one vector iteration and bailing out to
+ /// the scalar loop if an exit would occur.
+ ///
+ /// Using the latter approach (applicable to more targets), we need to hoist
+ /// the first load (of c[0]) out of the loop then rotate the load within the
+ /// loop to the next iteration, remembering to adjust the vector trip count.
+ /// Something like the following:
+ ///
+ /// vec.ph:
+ /// %ci.0 = load <4 x i32>, ptr %c
+ /// %cmp.0 = icmp eq <4 x i32> %ci.0, zeroinitializer
+ /// %any.of.0 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %cmp.0)
+ /// br i1 %any.of.0, label %scalar.ph, label %vec.body
+ /// vec.body:
+ /// %iv = phi...
+ /// phi for c[i] if used elsewhere in the loop...
+ /// other operations in the loop...
+ /// %iv.next = add i64 %iv, 4
+ /// %addr.next = getelementptr i32, ptr %c, i64 %iv.next
+ /// %ci.next = load <4 x i32>, ptr %addr.next
+ /// %cmp.next = icmp eq <4 x i32> %ci.next, zeroinitializer
+ /// %any.of.next = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %cmp.next)
+ /// iv.next compared with shortened vector tripcount...
+ /// uncountable condition combined with counted condition...
+ /// br...
+ ///
+ /// Doing this means the last few iterations will always be performed by a
+ /// scalar loop regardless of which exit is taken, and so vector iterations
+ /// will never execute a memory operation to a location that the scalar loop
+ /// would not have.
+ ///
+ /// This means we must ensure that it is safe to move the load for 'c[i]'
+ /// before other memory operations (or any other observable side effects) in
+ /// the loop.
+ ///
+ /// Currently, c[i] must have only one user (the comparison used for the
+ /// uncountable exit) since we would otherwise need to introduce a PHI node
+ /// for it.
+ bool canUncountableExitConditionLoadBeMoved(BasicBlock *ExitingBlock);
+
/// Return true if all of the instructions in the block can be speculatively
/// executed, and record the loads/stores that require masking.
/// \p SafePtrs is a list of addresses that are known to be legal and we know
@@ -646,6 +724,10 @@ class LoopVectorizationLegality {
BlockFrequencyInfo *BFI;
ProfileSummaryInfo *PSI;
+ // Alias Analysis results used to check for possible aliasing with loads
+ // used in uncountable exit conditions.
+ AAResults *AA;
+
/// If we discover function calls within the loop which have a valid
/// vectorized variant, record that fact so that LoopVectorize can
/// (potentially) make a better decision on the maximum VF and enable
@@ -659,6 +741,10 @@ class LoopVectorizationLegality {
/// Keep track of an uncountable exiting block, if there is exactly one early
/// exit.
BasicBlock *UncountableExitingBB = nullptr;
+
+ /// If true, the loop has at least one uncountable exit and operations within
+ /// the loop may have observable side effects.
+ bool UncountableExitWithSideEffects = false;
};
} // namespace llvm
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
index db1971aca4bff..bdc2a0dad8622 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -152,6 +152,7 @@ struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
LoopAccessInfoManager *LAIs;
OptimizationRemarkEmitter *ORE;
ProfileSummaryInfo *PSI;
+ AAResults *AA;
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
LLVM_ABI void
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 528ae3822d078..2704e66f3a703 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -15,8 +15,10 @@
//
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -1223,8 +1225,18 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
});
}
- if (!LAI->canVectorizeMemory())
+ if (!LAI->canVectorizeMemory()) {
+ if (hasUncountableExitWithSideEffects()) {
+ reportVectorizationFailure(
+ "Cannot vectorize unsafe dependencies in uncountable exit loop with "
+ "side effects",
+ "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,
+ TheLoop);
+ return false;
+ }
+
return canVectorizeIndirectUnsafeDependences();
+ }
if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
reportVectorizationFailure("We don't allow storing to uniform addresses",
@@ -1755,16 +1767,24 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
}
};
+ bool HasSideEffects = false;
for (auto *BB : TheLoop->blocks())
for (auto &I : *BB) {
if (I.mayWriteToMemory()) {
- // We don't support writes to memory.
+ if (isa<StoreInst>(&I) && cast<StoreInst>(&I)->isSimple()) {
+ HasSideEffects = true;
+ continue;
+ }
+
+ // We don't support complex writes to memory.
reportVectorizationFailure(
- "Writes to memory unsupported in early exit loops",
- "Cannot vectorize early exit loop with writes to memory",
+ "Complex writes to memory unsupported in early exit loops",
+ "Cannot vectorize early exit loop with complex writes to memory",
"WritesInEarlyExitLoop", ORE, TheLoop);
return false;
- } else if (!IsSafeOperation(&I)) {
+ }
+
+ if (!IsSafeOperation(&I)) {
reportVectorizationFailure("Early exit loop contains operations that "
"cannot be speculatively executed",
"UnsafeOperationsEarlyExitLoop", ORE,
@@ -1777,15 +1797,22 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock &&
"Expected latch predecessor to be the early exiting block");
- Predicates.clear();
SmallVector<LoadInst *, 4> NonDerefLoads;
- if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
- &Predicates)) {
- reportVectorizationFailure("Loop may fault",
- "Cannot vectorize non-read-only early exit loop",
- "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
+ // TODO: Handle loops that may fault.
+ if (!HasSideEffects) {
+ // Read-only loop.
+ Predicates.clear();
+ if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
+ &Predicates)) {
+ reportVectorizationFailure(
+ "Loop may fault", "Cannot vectorize non-read-only early exit loop",
+ "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+ } else if (!canUncountableExitConditionLoadBeMoved(
+ SingleUncountableExitingBlock))
return false;
- }
+
// Check non-dereferenceable loads if any.
for (LoadInst *LI : NonDerefLoads) {
// Only support unit-stride access for now.
@@ -1813,6 +1840,99 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
"backedge taken count: "
<< *SymbolicMaxBTC << '\n');
UncountableExitingBB = SingleUncountableExitingBlock;
+ UncountableExitWithSideEffects = HasSideEffects;
+ return true;
+}
+
+bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
+ BasicBlock *ExitingBlock) {
+ // Try to find a load in the critical path for the uncountable exit condition.
+ // This is currently matching about the simplest form we can, expecting
+ // only one in-loop load, the result of which is directly compared against
+ // a loop-invariant value.
+ // FIXME: We're insisting on a single use for now, because otherwise we will
+ // need to make PHI nodes for other users. That can be done once the initial
+ // transform code lands.
+ auto *Br = cast<BranchInst>(ExitingBlock->getTerminator());
+
+ using namespace llvm::PatternMatch;
+ Instruction *L = nullptr;
+ Value *Ptr = nullptr;
+ Value *R = nullptr;
+ if (!match(Br->getCondition(),
+ m_OneUse(m_ICmp(m_OneUse(m_Instruction(L, m_Load(m_Value(Ptr)))),
+ m_Value(R))))) {
+ reportVectorizationFailure(
+ "Early exit loop with store but no supported condition load",
+ "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+
+ // FIXME: Don't rely on operand ordering for the comparison.
+ if (!TheLoop->isLoopInvariant(R)) {
+ reportVectorizationFailure(
+ "Early exit loop with store but no supported condition load",
+ "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+
+ // Make sure that the load address is not loop invariant; we want an
+ // address calculation that we can rotate to the next vector iteration.
+ const SCEV *PtrScev = PSE.getSE()->getSCEV(Ptr);
+ if (!isa<SCEVAddRecExpr>(PtrScev)) {
+ reportVectorizationFailure(
+ "Uncountable exit condition depends on load with an address that is "
+ "not an add recurrence",
+ "EarlyExitLoadInvariantAddress", ORE, TheLoop);
+ return false;
+ }
+
+ // FIXME: Support gathers after first-faulting load support lands.
+ SmallVector<const SCEVPredicate *, 4> Predicates;
+ LoadInst *Load = cast<LoadInst>(L);
+ if (!isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT, AC,
+ &Predicates)) {
+ reportVectorizationFailure(
+ "Loop may fault",
+ "Cannot vectorize potentially faulting early exit loop",
+ "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+
+ ICFLoopSafetyInfo SafetyInfo;
+ SafetyInfo.computeLoopSafetyInfo(TheLoop);
+ // We need to know that load will be executed before we can hoist a
+ // copy out to run just before the first iteration.
+ // FIXME: Currently, other restrictions prevent us from reaching this point
+ // with a loop where the uncountable exit condition is determined
+ // by a conditional load.
+ assert(SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop) &&
+ "Unhandled control flow in uncountable exit loop with side effects");
+
+ // Prohibit any potential aliasing with any instruction in the loop which
+ // might store to memory.
+ // FIXME: Relax this constraint where possible.
+ for (auto *BB : TheLoop->blocks()) {
+ for (auto &I : *BB) {
+ if (&I == Load)
+ continue;
+
+ if (I.mayWriteToMemory()) {
+ if (auto *SI = dyn_cast<StoreInst>(&I)) {
+ AliasResult AR = AA->alias(Ptr, SI->getPointerOperand());
+ if (AR == AliasResult::NoAlias)
+ continue;
+ }
+
+ reportVectorizationFailure(
+ "Cannot determine whether critical uncountable exit load address "
+ "does not alias with a memory write",
+ "CantVectorizeAliasWithCriticalUncountableExitLoad", ORE, TheLoop);
+ return false;
+ }
+ }
+ }
+
return true;
}
@@ -1885,6 +2005,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
} else {
if (!isVectorizableEarlyExitLoop()) {
assert(!hasUncountableEarlyExit() &&
+ !hasUncountableExitWithSideEffects() &&
"Must be false without vectorizable early-exit loop");
if (DoExtraAnalysis)
Result = false;
@@ -1903,6 +2024,15 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return false;
}
+ // Bail out for state-changing loops with uncountable exits for now.
+ if (UncountableExitWithSideEffects) {
+ reportVectorizationFailure(
+ "Writes to memory unsupported in early exit loops",
+ "Cannot vectorize early exit loop with writes to memory",
+ "WritesInEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+
if (Result) {
LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
<< (LAI->getRuntimePointerChecking()->Need
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6c96214cea13c..3cff43a510298 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9810,7 +9810,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Check if it is legal to vectorize the loop.
LoopVectorizationRequirements Requirements;
LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, F, *LAIs, LI, ORE,
- &Requirements, &Hints, DB, AC, BFI, PSI);
+ &Requirements, &Hints, DB, AC, BFI, PSI, AA);
if (!LVL.canVectorize(EnableVPlanNativePath)) {
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
Hints.emitRemarkWithHints();
@@ -10247,6 +10247,7 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
DB = &AM.getResult<DemandedBitsAnalysis>(F);
ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
LAIs = &AM.getResult<LoopAccessAnalysis>(F);
+ AA = &AM.getResult<AAManager>(F);
auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
diff --git a/llvm/test/Transforms/LoopVectorize/control-flow.ll b/llvm/test/Transforms/LoopVectorize/control-flow.ll
index 3a8aec34dfe43..61836e4a29d58 100644
--- a/llvm/test/Transforms/LoopVectorize/control-flow.ll
+++ b/llvm/test/Transforms/LoopVectorize/control-flow.ll
@@ -10,7 +10,7 @@
; return 0;
; }
-; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with writes to memory
+; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize potentially faulting early exit loop
; CHECK: remark: source.cpp:5:9: loop not vectorized
; CHECK: _Z4testPii
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index 84d5ceeb601b6..82b44adc6df77 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -3,7 +3,7 @@
define i64 @loop_contains_store(ptr %dest) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops
+; CHECK: LV: Not vectorizing: Early exit loop with store but no supported condition load.
entry:
%p1 = alloca [1024 x i8]
call void @init_mem(ptr %p1, i64 1024)
@@ -56,7 +56,7 @@ exit:
define void @loop_contains_store_ee_condition_is_invariant(ptr dereferenceable(40) noalias %array, i16 %ee.val) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_ee_condition_is_invariant'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK: LV: Not vectorizing: Early exit loop with store but no supported condition load.
entry:
br label %for.body
@@ -80,7 +80,7 @@ exit:
define void @loop_contains_store_fcmp_condition(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_fcmp_condition'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK: LV: Not vectorizing: Early exit loop with store but no supported condition load.
entry:
br label %for.body
@@ -106,7 +106,7 @@ exit:
define void @loop_contains_store_safe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(96) %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_safe_dependency'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK: LV: Not vectorizing: Cannot determine whether critical uncountable exit load address does not alias with a memory write.
entry:
%pred.plus.8 = getelementptr inbounds nuw i16, ptr %pred, i64 8
br label %for.body
@@ -135,7 +135,7 @@ exit:
define void @loop_contains_store_unsafe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(80) readonly %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unsafe_dependency'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK: LV: Not vectorizing: Loop may fault.
entry:
%unknown.offset = call i64 @get_an_unknown_offset()
%unknown.cmp = icmp ult i64 %unknown.offset, 20
@@ -149,10 +149,10 @@ for.body:
%data = load i16, ptr %st.addr, align 2
%inc = add nsw i16 %data, 1
store i16 %inc, ptr %st.addr, align 2
- %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.addr = getelementptr inbounds nuw i16, ptr %unknown.base, i64 %iv
%ee.val = load i16, ptr %ee.addr, align 2
%ee.cond = icmp sgt i16 %ee.val, 500
- %some.addr = getelementptr inbounds nuw i16, ptr %unknown.base, i64 %iv
+ %some.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
store i16 42, ptr %some.addr, align 2
br i1 %ee.cond, label %exit, label %for.inc
@@ -167,7 +167,7 @@ exit:
define void @loop_contains_store_assumed_bounds(ptr noalias %array, ptr readonly %pred, i32 %n) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_assumed_bounds'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK: LV: Not vectorizing: Loop may fault.
entry:
%n_bytes = mul nuw nsw i32 %n, 2
call void @llvm.assume(i1 true) [ "align"(ptr %pred, i64 2), "dereferenceable"(ptr %pred, i32 %n_bytes) ]
@@ -223,7 +223,7 @@ exit:
define void @loop_contains_store_unknown_bounds(ptr align 2 dereferenceable(100) noalias %array, ptr align 2 dereferenceable(100) readonly %pred, i64 %n) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unknown_bounds'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK: LV: Not vectorizing: Loop may fault.
entry:
br label %for.body
@@ -249,7 +249,7 @@ exit:
define void @loop_contains_store_volatile(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_volatile'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK: LV: Not vectorizing: Complex writes to memory unsupported in early exit loops.
entry:
br label %for.body
@@ -327,7 +327,7 @@ exit:
define void @loop_contains_store_requiring_alias_check(ptr dereferenceable(40) %array, ptr align 2 dereferenceable(40) %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_requiring_alias_check'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK: LV: Not vectorizing: Cannot determine whether critical uncountable exit load address does not alias with a memory write.
entry:
br label %for.body
@@ -353,7 +353,7 @@ exit:
define void @loop_contains_store_condition_load_is_chained(ptr dereferenceable(40) noalias %array, ptr align 8 dereferenceable(160) readonly %offsets, ptr align 2 dereferenceable(40) readonly %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_condition_load_is_chained'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK: LV: Not vectorizing: Uncountable exit condition depends on load with an address that is not an add recurrence.
entry:
br label %for.body
@@ -405,5 +405,167 @@ exit:
ret void
}
+define void @loop_contains_store_condition_load_requires_gather(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(512) readonly %pred, ptr align 1 dereferenceable(20) readonly %offsets) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_condition_load_requires_gather'
+; CHECK: LV: Not vectorizing: Uncountable exit condition depends on load with an address that is not an add recurrence.
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %offset.addr = getelementptr inbounds nuw i8, ptr %offsets, i64 %iv
+ %offset = load i8, ptr %offset.addr, align 1
+ %offset.zext = zext i8 %offset to i64
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %offset.zext
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @loop_contains_store_uncounted_exit_is_a_switch(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_uncounted_exit_is_a_switch'
+; CHECK: LV: Not vectorizing: Loop contains an unsupported switch
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ switch i16 %ee.val, label %for.inc [ i16 500, label %exit ]
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @loop_contains_store_uncounted_exit_is_not_guaranteed_to_execute(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_uncounted_exit_is_not_guaranteed_to_execute'
+; CHECK: LV: Not vectorizing: Early exit is not the latch predecessor.
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %rem = urem i64 %iv, 5
+ %skip.ee.cmp = icmp eq i64 %rem, 0
+ br i1 %skip.ee.cmp, label %for.inc, label %ee.block
+
+ee.block:
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @test_nodep(ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'test_nodep'
+; CHECK: LV: Not vectorizing: Cannot determine whether critical uncountable exit load address does not alias with a memory write.
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ store i16 0, ptr %st.addr, align 2
+ %ee.val = load i16, ptr %st.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @histogram_with_uncountable_exit(ptr noalias %buckets, ptr readonly %indices, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'histogram_with_uncountable_exit'
+; CHECK: LV: Not vectorizing: Cannot vectorize unsafe dependencies in uncountable exit loop with side effects.
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
+ %l.idx = load i32, ptr %gep.indices, align 4
+ %idxprom1 = zext i32 %l.idx to i64
+ %gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
+ %l.bucket = load i32, ptr %gep.bucket, align 4
+ %inc = add nsw i32 %l.bucket, 1
+ store i32 %inc, ptr %gep.bucket, align 4
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @uncountable_exit_condition_address_is_invariant(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(2) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_condition_address_is_invariant'
+; CHECK: LV: Not vectorizing: Uncountable exit condition depends on load with an address that is not an add recurrence.
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %ee.val = load i16, ptr %pred, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
declare void @init_mem(ptr, i64);
declare i64 @get_an_unknown_offset();
More information about the llvm-commits
mailing list