[llvm] [LV] Move dereferenceability check from Legal to VPlan (NFC) (PR #185323)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 8 14:53:18 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Instead of checking dereferenceability early during LoopVectorizationLegality, defer the check to VPlan construction via areAllLoadsDereferenceable.
This in preparation for supporting early exit vectorization of non-dereferencable loads, e.g. via speculative loads (https://discourse.llvm.org/t/rfc-provide-intrinsics-for-speculative-loads/89692) or first-faulting loads. Detection in VPlan allows easily replacing potentially non-deref loads with other loads as needed.
---
Full diff: https://github.com/llvm/llvm-project/pull/185323.diff
8 Files Affected:
- (modified) llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h (-9)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp (+1-15)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+8-10)
- (modified) llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp (+61-5)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+7-3)
- (modified) llvm/test/Transforms/LoopVectorize/control-flow.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/early_exit_legality.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll (+3-3)
``````````diff
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index f82fc588639dd..f4650dd7a28e4 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -457,12 +457,6 @@ class LoopVectorizationLegality {
/// Returns a list of all known histogram operations in the loop.
bool hasHistograms() const { return !Histograms.empty(); }
- /// Returns potentially faulting loads.
- const SmallPtrSetImpl<const Instruction *> &
- getPotentiallyFaultingLoads() const {
- return PotentiallyFaultingLoads;
- }
-
PredicatedScalarEvolution *getPredicatedScalarEvolution() const {
return &PSE;
}
@@ -718,9 +712,6 @@ class LoopVectorizationLegality {
/// may work on the same memory location.
SmallVector<HistogramInfo, 1> Histograms;
- /// Hold potentially faulting loads.
- SmallPtrSet<const Instruction *, 4> PotentiallyFaultingLoads;
-
/// Whether or not creating SCEV predicates is allowed.
bool AllowRuntimeSCEVChecks;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 05f1dc18881df..c0d144d18a42f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1830,9 +1830,6 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
"EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop);
return false;
}
- PotentiallyFaultingLoads.insert(LI);
- LLVM_DEBUG(dbgs() << "LV: Found potentially faulting load: " << *LI
- << "\n");
}
[[maybe_unused]] const SCEV *SymbolicMaxBTC =
@@ -1892,20 +1889,9 @@ bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
return false;
}
- // FIXME: Support gathers after first-faulting load support lands.
- SmallVector<const SCEVPredicate *, 4> Predicates;
- LoadInst *Load = cast<LoadInst>(L);
- if (!isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT, AC,
- &Predicates)) {
- reportVectorizationFailure(
- "Loop may fault",
- "Cannot vectorize potentially faulting early exit loop",
- "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
- return false;
- }
-
ICFLoopSafetyInfo SafetyInfo;
SafetyInfo.computeLoopSafetyInfo(TheLoop);
+ LoadInst *Load = cast<LoadInst>(L);
// We need to know that load will be executed before we can hoist a
// copy out to run just before the first iteration.
if (!SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 420b111bafc72..96ec27d5d1003 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8206,7 +8206,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
return !CM.requiresScalarEpilogue(VF.isVector());
},
Range);
- VPlanTransforms::handleEarlyExits(*Plan, Legal->hasUncountableEarlyExit());
+ if (!VPlanTransforms::handleEarlyExits(
+ *Plan, Legal->hasUncountableEarlyExit(), OrigLoop, PSE, *DT,
+ Legal->getAssumptionCache()))
+ return nullptr;
VPlanTransforms::addMiddleCheck(*Plan, RequiresScalarEpilogueCheck,
CM.foldTailByMasking());
@@ -8456,8 +8459,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
MapVector<PHINode *, RecurrenceDescriptor>(),
SmallPtrSet<const PHINode *, 1>(), SmallPtrSet<PHINode *, 1>(),
/*AllowReordering=*/false);
- VPlanTransforms::handleEarlyExits(*Plan,
- /*HasUncountableExit*/ false);
+ if (!VPlanTransforms::handleEarlyExits(*Plan,
+ /*HasUncountableExit*/ false, OrigLoop,
+ PSE, *DT, Legal->getAssumptionCache()))
+ return nullptr;
VPlanTransforms::addMiddleCheck(*Plan, /*RequiresScalarEpilogue*/ true,
/*TailFolded*/ false);
@@ -9542,13 +9547,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
}
}
- if (!LVL.getPotentiallyFaultingLoads().empty()) {
- reportVectorizationFailure("Auto-vectorization of loops with potentially "
- "faulting load is not supported",
- "PotentiallyFaultingLoadsNotSupported", ORE, L);
- return false;
- }
-
// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
// even evaluating whether vectorization is profitable. Since we cannot modify
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index b9f8cb3da24ff..883558e4ca3c7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -20,6 +20,7 @@
#include "VPlanPatternMatch.h"
#include "VPlanTransforms.h"
#include "VPlanUtils.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -28,6 +29,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
@@ -941,17 +943,70 @@ void VPlanTransforms::createInLoopReductionRecipes(
R->eraseFromParent();
}
-void VPlanTransforms::handleEarlyExits(VPlan &Plan,
- bool HasUncountableEarlyExit) {
+/// Check if all loads in the loop are dereferenceable. Iterates over all blocks
+/// reachable from \p HeaderVPBB, skipping \p MiddleVPBB. Returns false if any
+/// non-dereferenceable load is found.
+static bool areAllLoadsDereferenceable(VPBasicBlock *HeaderVPBB,
+ VPBasicBlock *MiddleVPBB, Loop *TheLoop,
+ PredicatedScalarEvolution &PSE,
+ DominatorTree &DT, AssumptionCache *AC) {
+ ScalarEvolution &SE = *PSE.getSE();
+ const DataLayout &DL = TheLoop->getHeader()->getDataLayout();
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(HeaderVPBB))) {
+ // Skip blocks outside the loop (exit blocks and their successors).
+ if (VPBB == MiddleVPBB)
+ continue;
+ for (VPRecipeBase &R : *VPBB) {
+ auto *VPI = dyn_cast<VPInstructionWithType>(&R);
+ if (!VPI || VPI->getOpcode() != Instruction::Load)
+ continue;
+
+ // Get the pointer SCEV for dereferenceability checking.
+ VPValue *Ptr = VPI->getOperand(0);
+ const SCEV *PtrSCEV = vputils::getSCEVExprForVPValue(Ptr, PSE, TheLoop);
+ if (isa<SCEVCouldNotCompute>(PtrSCEV)) {
+ LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Found non-dereferenceable "
+ "load with SCEVCouldNotCompute pointer\n");
+ return false;
+ }
+
+ // Check dereferenceability using the SCEV-based version.
+ Type *LoadTy = VPI->getResultType();
+ const SCEV *SizeSCEV =
+ SE.getStoreSizeOfExpr(DL.getIndexType(PtrSCEV->getType()), LoadTy);
+ auto *Load = cast<LoadInst>(VPI->getUnderlyingValue());
+ SmallVector<const SCEVPredicate *> Preds;
+ if (isDereferenceableAndAlignedInLoop(PtrSCEV, Load->getAlign(), SizeSCEV,
+ TheLoop, SE, DT, AC, &Preds))
+ continue;
+
+ LLVM_DEBUG(
+ dbgs() << "LV: Not vectorizing: Auto-vectorization of loops with "
+ "potentially faulting load is not supported.\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+bool VPlanTransforms::handleEarlyExits(VPlan &Plan,
+ bool HasUncountableEarlyExit,
+ Loop *TheLoop,
+ PredicatedScalarEvolution &PSE,
+ DominatorTree &DT, AssumptionCache *AC) {
auto *MiddleVPBB = cast<VPBasicBlock>(
Plan.getScalarHeader()->getSinglePredecessor()->getPredecessors()[0]);
auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
if (HasUncountableEarlyExit) {
- handleUncountableEarlyExits(Plan, cast<VPBasicBlock>(HeaderVPB), LatchVPBB,
- MiddleVPBB);
- return;
+ auto *HeaderVPBB = cast<VPBasicBlock>(HeaderVPB);
+ if (!areAllLoadsDereferenceable(HeaderVPBB, MiddleVPBB, TheLoop, PSE, DT,
+ AC))
+ return false;
+ handleUncountableEarlyExits(Plan, HeaderVPBB, LatchVPBB, MiddleVPBB);
+ return true;
}
// Disconnect countable early exits from the loop, leaving it with a single
@@ -969,6 +1024,7 @@ void VPlanTransforms::handleEarlyExits(VPlan &Plan,
VPBlockUtils::disconnectBlocks(Pred, EB);
}
}
+ return true;
}
void VPlanTransforms::addMiddleCheck(VPlan &Plan,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 45e7be3169a52..e4da964d0fc72 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -149,9 +149,13 @@ struct VPlanTransforms {
VPlan &Plan, const DenseSet<BasicBlock *> &BlocksNeedingPredication,
ElementCount MinVF);
- /// Update \p Plan to account for all early exits.
- LLVM_ABI_FOR_TEST static void handleEarlyExits(VPlan &Plan,
- bool HasUncountableExit);
+ /// Update \p Plan to account for all early exits. If \p HasUncountableExit
+ /// is true, handles uncountable early exits and checks that all loads are
+ /// dereferenceable. Returns false if a non-dereferenceable load is found.
+ LLVM_ABI_FOR_TEST static bool
+ handleEarlyExits(VPlan &Plan, bool HasUncountableExit, Loop *TheLoop,
+ PredicatedScalarEvolution &PSE, DominatorTree &DT,
+ AssumptionCache *AC);
/// If a check is needed to guard executing the scalar epilogue loop, it will
/// be added to the middle block.
diff --git a/llvm/test/Transforms/LoopVectorize/control-flow.ll b/llvm/test/Transforms/LoopVectorize/control-flow.ll
index 61836e4a29d58..2ff2add30b436 100644
--- a/llvm/test/Transforms/LoopVectorize/control-flow.ll
+++ b/llvm/test/Transforms/LoopVectorize/control-flow.ll
@@ -10,7 +10,7 @@
; return 0;
; }
-; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize potentially faulting early exit loop
+; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot determine whether critical uncountable exit load address does not alias with a memory write
; CHECK: remark: source.cpp:5:9: loop not vectorized
; CHECK: _Z4testPii
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
index 5d9f01368934d..64529606779cc 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; REQUIRES: asserts
-; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -force-vector-width=4 -disable-output 2>&1 | FileCheck %s
+; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize,vplan -force-vector-width=4 -disable-output 2>&1 | FileCheck %s
declare void @init_mem(ptr, i64);
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index 73f5c66fe8d0d..a46934f7b4a07 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -141,7 +141,7 @@ exit:
;; Possibly vectorizeable, but would require some runtime checks.
define void @loop_contains_store_unsafe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(80) %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unsafe_dependency'
-; CHECK: LV: Not vectorizing: Loop may fault.
+; CHECK: LV: Not vectorizing: Cannot determine whether critical uncountable exit load address does not alias with a memory write.
entry:
%unknown.offset = call i64 @get_an_unknown_offset()
%unknown.cmp = icmp ult i64 %unknown.offset, 20
@@ -176,7 +176,7 @@ exit:
;; Alternatively, we could use masked.load.ff or vp.load.ff
define void @loop_contains_store_assumed_bounds(ptr noalias %array, ptr readonly %pred, i64 %n) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_assumed_bounds'
-; CHECK: LV: Not vectorizing: Loop may fault.
+; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
entry:
%n_bytes = mul nuw nsw i64 %n, 2
call void @llvm.assume(i1 true) [ "align"(ptr %pred, i64 2), "dereferenceable"(ptr %pred, i64 %n_bytes) ]
@@ -232,7 +232,7 @@ exit:
;; Vectorizeable, requires runtime checks and/or ff loads.
define void @loop_contains_store_unknown_bounds(ptr align 2 dereferenceable(100) noalias %array, ptr align 2 dereferenceable(100) readonly %pred, i64 %n) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unknown_bounds'
-; CHECK: LV: Not vectorizing: Loop may fault.
+; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
entry:
br label %for.body
``````````
</details>
https://github.com/llvm/llvm-project/pull/185323
More information about the llvm-commits
mailing list