[llvm] [LV] Move dereferenceability check from Legal to VPlan (NFC) (PR #185323)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 8 14:52:44 PDT 2026
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/185323
Instead of checking dereferenceability early during LoopVectorizationLegality, defer the check to VPlan construction via areAllLoadsDereferenceable.
This in preparation for supporting early exit vectorization of non-dereferencable loads, e.g. via speculative loads (https://discourse.llvm.org/t/rfc-provide-intrinsics-for-speculative-loads/89692) or first-faulting loads. Detection in VPlan allows easily replacing potentially non-deref loads with other loads as needed.
>From eadbf4ed1ac01392663a1cb6c3f02f65e1576141 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 8 Mar 2026 21:51:53 +0000
Subject: [PATCH] [LV] Move dereferenceability check from Legal to VPlan (NFC)
Instead of checking dereferenceability early during
LoopVectorizationLegality, defer the check to VPlan construction via
areAllLoadsDereferenceable.
This in preparation for supporting early exit vectorization of
non-dereferencable loads, e.g. via speculative loads
(https://discourse.llvm.org/t/rfc-provide-intrinsics-for-speculative-loads/89692)
or first-faulting loads. Detection in VPlan allows easily replacing
potentially non-deref loads with other loads as needed.
---
.../Vectorize/LoopVectorizationLegality.h | 9 ---
.../Vectorize/LoopVectorizationLegality.cpp | 16 +----
.../Transforms/Vectorize/LoopVectorize.cpp | 18 +++--
.../Vectorize/VPlanConstruction.cpp | 66 +++++++++++++++++--
.../Transforms/Vectorize/VPlanTransforms.h | 10 ++-
.../Transforms/LoopVectorize/control-flow.ll | 2 +-
.../LoopVectorize/early_exit_legality.ll | 2 +-
.../early_exit_store_legality.ll | 6 +-
8 files changed, 82 insertions(+), 47 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index f82fc588639dd..f4650dd7a28e4 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -457,12 +457,6 @@ class LoopVectorizationLegality {
/// Returns a list of all known histogram operations in the loop.
bool hasHistograms() const { return !Histograms.empty(); }
- /// Returns potentially faulting loads.
- const SmallPtrSetImpl<const Instruction *> &
- getPotentiallyFaultingLoads() const {
- return PotentiallyFaultingLoads;
- }
-
PredicatedScalarEvolution *getPredicatedScalarEvolution() const {
return &PSE;
}
@@ -718,9 +712,6 @@ class LoopVectorizationLegality {
/// may work on the same memory location.
SmallVector<HistogramInfo, 1> Histograms;
- /// Hold potentially faulting loads.
- SmallPtrSet<const Instruction *, 4> PotentiallyFaultingLoads;
-
/// Whether or not creating SCEV predicates is allowed.
bool AllowRuntimeSCEVChecks;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 05f1dc18881df..c0d144d18a42f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1830,9 +1830,6 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
"EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop);
return false;
}
- PotentiallyFaultingLoads.insert(LI);
- LLVM_DEBUG(dbgs() << "LV: Found potentially faulting load: " << *LI
- << "\n");
}
[[maybe_unused]] const SCEV *SymbolicMaxBTC =
@@ -1892,20 +1889,9 @@ bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
return false;
}
- // FIXME: Support gathers after first-faulting load support lands.
- SmallVector<const SCEVPredicate *, 4> Predicates;
- LoadInst *Load = cast<LoadInst>(L);
- if (!isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT, AC,
- &Predicates)) {
- reportVectorizationFailure(
- "Loop may fault",
- "Cannot vectorize potentially faulting early exit loop",
- "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
- return false;
- }
-
ICFLoopSafetyInfo SafetyInfo;
SafetyInfo.computeLoopSafetyInfo(TheLoop);
+ LoadInst *Load = cast<LoadInst>(L);
// We need to know that load will be executed before we can hoist a
// copy out to run just before the first iteration.
if (!SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 420b111bafc72..96ec27d5d1003 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8206,7 +8206,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
return !CM.requiresScalarEpilogue(VF.isVector());
},
Range);
- VPlanTransforms::handleEarlyExits(*Plan, Legal->hasUncountableEarlyExit());
+ if (!VPlanTransforms::handleEarlyExits(
+ *Plan, Legal->hasUncountableEarlyExit(), OrigLoop, PSE, *DT,
+ Legal->getAssumptionCache()))
+ return nullptr;
VPlanTransforms::addMiddleCheck(*Plan, RequiresScalarEpilogueCheck,
CM.foldTailByMasking());
@@ -8456,8 +8459,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
MapVector<PHINode *, RecurrenceDescriptor>(),
SmallPtrSet<const PHINode *, 1>(), SmallPtrSet<PHINode *, 1>(),
/*AllowReordering=*/false);
- VPlanTransforms::handleEarlyExits(*Plan,
- /*HasUncountableExit*/ false);
+ if (!VPlanTransforms::handleEarlyExits(*Plan,
+ /*HasUncountableExit*/ false, OrigLoop,
+ PSE, *DT, Legal->getAssumptionCache()))
+ return nullptr;
VPlanTransforms::addMiddleCheck(*Plan, /*RequiresScalarEpilogue*/ true,
/*TailFolded*/ false);
@@ -9542,13 +9547,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
}
}
- if (!LVL.getPotentiallyFaultingLoads().empty()) {
- reportVectorizationFailure("Auto-vectorization of loops with potentially "
- "faulting load is not supported",
- "PotentiallyFaultingLoadsNotSupported", ORE, L);
- return false;
- }
-
// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
// even evaluating whether vectorization is profitable. Since we cannot modify
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index b9f8cb3da24ff..883558e4ca3c7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -20,6 +20,7 @@
#include "VPlanPatternMatch.h"
#include "VPlanTransforms.h"
#include "VPlanUtils.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -28,6 +29,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
@@ -941,17 +943,70 @@ void VPlanTransforms::createInLoopReductionRecipes(
R->eraseFromParent();
}
-void VPlanTransforms::handleEarlyExits(VPlan &Plan,
- bool HasUncountableEarlyExit) {
+/// Check if all loads in the loop are dereferenceable. Iterates over all blocks
+/// reachable from \p HeaderVPBB, skipping \p MiddleVPBB. Returns false if any
+/// non-dereferenceable load is found.
+static bool areAllLoadsDereferenceable(VPBasicBlock *HeaderVPBB,
+ VPBasicBlock *MiddleVPBB, Loop *TheLoop,
+ PredicatedScalarEvolution &PSE,
+ DominatorTree &DT, AssumptionCache *AC) {
+ ScalarEvolution &SE = *PSE.getSE();
+ const DataLayout &DL = TheLoop->getHeader()->getDataLayout();
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(HeaderVPBB))) {
+ // Skip blocks outside the loop (exit blocks and their successors).
+ if (VPBB == MiddleVPBB)
+ continue;
+ for (VPRecipeBase &R : *VPBB) {
+ auto *VPI = dyn_cast<VPInstructionWithType>(&R);
+ if (!VPI || VPI->getOpcode() != Instruction::Load)
+ continue;
+
+ // Get the pointer SCEV for dereferenceability checking.
+ VPValue *Ptr = VPI->getOperand(0);
+ const SCEV *PtrSCEV = vputils::getSCEVExprForVPValue(Ptr, PSE, TheLoop);
+ if (isa<SCEVCouldNotCompute>(PtrSCEV)) {
+ LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Found non-dereferenceable "
+ "load with SCEVCouldNotCompute pointer\n");
+ return false;
+ }
+
+ // Check dereferenceability using the SCEV-based version.
+ Type *LoadTy = VPI->getResultType();
+ const SCEV *SizeSCEV =
+ SE.getStoreSizeOfExpr(DL.getIndexType(PtrSCEV->getType()), LoadTy);
+ auto *Load = cast<LoadInst>(VPI->getUnderlyingValue());
+ SmallVector<const SCEVPredicate *> Preds;
+ if (isDereferenceableAndAlignedInLoop(PtrSCEV, Load->getAlign(), SizeSCEV,
+ TheLoop, SE, DT, AC, &Preds))
+ continue;
+
+ LLVM_DEBUG(
+ dbgs() << "LV: Not vectorizing: Auto-vectorization of loops with "
+ "potentially faulting load is not supported.\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+bool VPlanTransforms::handleEarlyExits(VPlan &Plan,
+ bool HasUncountableEarlyExit,
+ Loop *TheLoop,
+ PredicatedScalarEvolution &PSE,
+ DominatorTree &DT, AssumptionCache *AC) {
auto *MiddleVPBB = cast<VPBasicBlock>(
Plan.getScalarHeader()->getSinglePredecessor()->getPredecessors()[0]);
auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
if (HasUncountableEarlyExit) {
- handleUncountableEarlyExits(Plan, cast<VPBasicBlock>(HeaderVPB), LatchVPBB,
- MiddleVPBB);
- return;
+ auto *HeaderVPBB = cast<VPBasicBlock>(HeaderVPB);
+ if (!areAllLoadsDereferenceable(HeaderVPBB, MiddleVPBB, TheLoop, PSE, DT,
+ AC))
+ return false;
+ handleUncountableEarlyExits(Plan, HeaderVPBB, LatchVPBB, MiddleVPBB);
+ return true;
}
// Disconnect countable early exits from the loop, leaving it with a single
@@ -969,6 +1024,7 @@ void VPlanTransforms::handleEarlyExits(VPlan &Plan,
VPBlockUtils::disconnectBlocks(Pred, EB);
}
}
+ return true;
}
void VPlanTransforms::addMiddleCheck(VPlan &Plan,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 45e7be3169a52..e4da964d0fc72 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -149,9 +149,13 @@ struct VPlanTransforms {
VPlan &Plan, const DenseSet<BasicBlock *> &BlocksNeedingPredication,
ElementCount MinVF);
- /// Update \p Plan to account for all early exits.
- LLVM_ABI_FOR_TEST static void handleEarlyExits(VPlan &Plan,
- bool HasUncountableExit);
+ /// Update \p Plan to account for all early exits. If \p HasUncountableExit
+ /// is true, handles uncountable early exits and checks that all loads are
+ /// dereferenceable. Returns false if a non-dereferenceable load is found.
+ LLVM_ABI_FOR_TEST static bool
+ handleEarlyExits(VPlan &Plan, bool HasUncountableExit, Loop *TheLoop,
+ PredicatedScalarEvolution &PSE, DominatorTree &DT,
+ AssumptionCache *AC);
/// If a check is needed to guard executing the scalar epilogue loop, it will
/// be added to the middle block.
diff --git a/llvm/test/Transforms/LoopVectorize/control-flow.ll b/llvm/test/Transforms/LoopVectorize/control-flow.ll
index 61836e4a29d58..2ff2add30b436 100644
--- a/llvm/test/Transforms/LoopVectorize/control-flow.ll
+++ b/llvm/test/Transforms/LoopVectorize/control-flow.ll
@@ -10,7 +10,7 @@
; return 0;
; }
-; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize potentially faulting early exit loop
+; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot determine whether critical uncountable exit load address does not alias with a memory write
; CHECK: remark: source.cpp:5:9: loop not vectorized
; CHECK: _Z4testPii
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
index 5d9f01368934d..64529606779cc 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; REQUIRES: asserts
-; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -force-vector-width=4 -disable-output 2>&1 | FileCheck %s
+; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize,vplan -force-vector-width=4 -disable-output 2>&1 | FileCheck %s
declare void @init_mem(ptr, i64);
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index 73f5c66fe8d0d..a46934f7b4a07 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -141,7 +141,7 @@ exit:
;; Possibly vectorizeable, but would require some runtime checks.
define void @loop_contains_store_unsafe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(80) %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unsafe_dependency'
-; CHECK: LV: Not vectorizing: Loop may fault.
+; CHECK: LV: Not vectorizing: Cannot determine whether critical uncountable exit load address does not alias with a memory write.
entry:
%unknown.offset = call i64 @get_an_unknown_offset()
%unknown.cmp = icmp ult i64 %unknown.offset, 20
@@ -176,7 +176,7 @@ exit:
;; Alternatively, we could use masked.load.ff or vp.load.ff
define void @loop_contains_store_assumed_bounds(ptr noalias %array, ptr readonly %pred, i64 %n) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_assumed_bounds'
-; CHECK: LV: Not vectorizing: Loop may fault.
+; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
entry:
%n_bytes = mul nuw nsw i64 %n, 2
call void @llvm.assume(i1 true) [ "align"(ptr %pred, i64 2), "dereferenceable"(ptr %pred, i64 %n_bytes) ]
@@ -232,7 +232,7 @@ exit:
;; Vectorizeable, requires runtime checks and/or ff loads.
define void @loop_contains_store_unknown_bounds(ptr align 2 dereferenceable(100) noalias %array, ptr align 2 dereferenceable(100) readonly %pred, i64 %n) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unknown_bounds'
-; CHECK: LV: Not vectorizing: Loop may fault.
+; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
entry:
br label %for.body
More information about the llvm-commits
mailing list