[llvm] [RFC][LV] Add support for speculative loads in loops that may fault (PR #151300)
Shih-Po Hung via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 20 00:54:40 PDT 2025
https://github.com/arcbbb updated https://github.com/llvm/llvm-project/pull/151300
>From 606cb13870973d4188d13499e500f551f0ba6d71 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Tue, 19 Aug 2025 18:45:15 -0700
Subject: [PATCH 1/3] [LV] Add initial legality checks for loops with unbound
loads.
---
llvm/include/llvm/Analysis/Loads.h | 6 +-
.../llvm/Analysis/TargetTransformInfo.h | 3 +
.../llvm/Analysis/TargetTransformInfoImpl.h | 4 +
.../Vectorize/LoopVectorizationLegality.h | 8 ++
llvm/lib/Analysis/Loads.cpp | 9 +-
llvm/lib/Analysis/TargetTransformInfo.cpp | 5 ++
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 12 +++
llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 +
.../Target/RISCV/RISCVTargetTransformInfo.h | 5 ++
.../Vectorize/LoopVectorizationLegality.cpp | 29 ++++++-
.../Transforms/Vectorize/LoopVectorize.cpp | 7 ++
.../RISCV/unbound-access-legality.ll | 84 +++++++++++++++++++
llvm/unittests/Analysis/LoadsTest.cpp | 5 +-
13 files changed, 172 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll
diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h
index 84564563de8e3..7f28afafb3500 100644
--- a/llvm/include/llvm/Analysis/Loads.h
+++ b/llvm/include/llvm/Analysis/Loads.h
@@ -86,9 +86,11 @@ LLVM_ABI bool isDereferenceableAndAlignedInLoop(
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
/// Return true if the loop \p L cannot fault on any iteration and only
-/// contains read-only memory accesses.
-LLVM_ABI bool isDereferenceableReadOnlyLoop(
+/// contains read-only memory accesses. Also collect loads that are not
+/// guaranteed to be dereferenceable.
+LLVM_ABI bool isReadOnlyLoopWithSafeOrSpeculativeLoads(
Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ SmallVectorImpl<LoadInst *> *SpeculativeLoads,
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
/// Return true if we know that executing a load from this value cannot trap.
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 1e03209e888bf..8d33aaca53a00 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -843,6 +843,9 @@ class TargetTransformInfo {
/// Return true if the target supports strided load.
LLVM_ABI bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const;
+ /// Return true if the target supports speculative load.
+ LLVM_ABI bool isLegalSpeculativeLoad(Type *DataType, Align Alignment) const;
+
/// Return true is the target supports interleaved access for the given vector
/// type \p VTy, interleave factor \p Factor, alignment \p Alignment and
/// address space \p AddrSpace.
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 252acf381a8e1..3bcfd2b0f9308 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -374,6 +374,10 @@ class TargetTransformInfoImplBase {
return false;
}
+ virtual bool isLegalSpeculativeLoad(Type *DataType, Align Alignment) const {
+ return false;
+ }
+
virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
Align Alignment,
unsigned AddrSpace) const {
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 48ee93acbe008..727b663288aa2 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -445,6 +445,11 @@ class LoopVectorizationLegality {
/// Returns a list of all known histogram operations in the loop.
bool hasHistograms() const { return !Histograms.empty(); }
+ /// Returns the loads that may fault and need to be speculative.
+ const SmallPtrSetImpl<const Instruction *> &getSpeculativeLoads() const {
+ return SpeculativeLoads;
+ }
+
PredicatedScalarEvolution *getPredicatedScalarEvolution() const {
return &PSE;
}
@@ -633,6 +638,9 @@ class LoopVectorizationLegality {
/// may work on the same memory location.
SmallVector<HistogramInfo, 1> Histograms;
+ /// Hold all loads that need to be speculative.
+ SmallPtrSet<const Instruction *, 4> SpeculativeLoads;
+
/// BFI and PSI are used to check for profile guided size optimizations.
BlockFrequencyInfo *BFI;
ProfileSummaryInfo *PSI;
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 9a2c9ba63ec7e..0c4ccde68b718 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -855,16 +855,19 @@ bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To,
return isPointerAlwaysReplaceable(From, To, DL);
}
-bool llvm::isDereferenceableReadOnlyLoop(
+bool llvm::isReadOnlyLoopWithSafeOrSpeculativeLoads(
Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ SmallVectorImpl<LoadInst *> *SpeculativeLoads,
SmallVectorImpl<const SCEVPredicate *> *Predicates) {
for (BasicBlock *BB : L->blocks()) {
for (Instruction &I : *BB) {
if (auto *LI = dyn_cast<LoadInst>(&I)) {
if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates))
- return false;
- } else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
+ SpeculativeLoads->push_back(LI);
+ } else if (I.mayReadFromMemory() || I.mayWriteToMemory() ||
+ I.mayThrow()) {
return false;
+ }
}
}
return true;
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 323ab8b1ddad1..e33800a3b94d6 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -531,6 +531,11 @@ bool TargetTransformInfo::isLegalStridedLoadStore(Type *DataType,
return TTIImpl->isLegalStridedLoadStore(DataType, Alignment);
}
+bool TargetTransformInfo::isLegalSpeculativeLoad(Type *DataType,
+ Align Alignment) const {
+ return TTIImpl->isLegalSpeculativeLoad(DataType, Alignment);
+}
+
bool TargetTransformInfo::isLegalInterleavedAccessType(
VectorType *VTy, unsigned Factor, Align Alignment,
unsigned AddrSpace) const {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4a1db80076530..ea1d89867809e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -24525,6 +24525,18 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
return true;
}
+bool RISCVTargetLowering::isLegalSpeculativeLoad(EVT DataType,
+ Align Alignment) const {
+ if (!Subtarget.hasVInstructions())
+ return false;
+
+ EVT ScalarType = DataType.getScalarType();
+ if (!isLegalElementTypeForRVV(ScalarType))
+ return false;
+
+ return true;
+}
+
MachineInstr *
RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
MachineBasicBlock::instr_iterator &MBBI,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index fb63ebcfaacea..071eb86ed35a6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -425,6 +425,10 @@ class RISCVTargetLowering : public TargetLowering {
/// alignment is legal.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const;
+ /// Return true if a speculative load of the given result type and
+ /// alignment is legal.
+ bool isLegalSpeculativeLoad(EVT DataType, Align Alignment) const;
+
unsigned getMaxSupportedInterleaveFactor() const override { return 8; }
bool fallBackToDAGISel(const Instruction &Inst) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index b632f25b963f7..b9cac933f8f26 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -332,6 +332,11 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
return TLI->isLegalStridedLoadStore(DataTypeVT, Alignment);
}
+ bool isLegalSpeculativeLoad(Type *DataType, Align Alignment) const override {
+ EVT DataTypeVT = TLI->getValueType(DL, DataType);
+ return TLI->isLegalSpeculativeLoad(DataTypeVT, Alignment);
+ }
+
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
Align Alignment,
unsigned AddrSpace) const override {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 789047a2a28e7..eef37811cc0d6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1776,16 +1776,39 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock &&
"Expected latch predecessor to be the early exiting block");
- // TODO: Handle loops that may fault.
Predicates.clear();
- if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
- &Predicates)) {
+ SmallVector<LoadInst *, 4> NonDerefLoads;
+ if (!isReadOnlyLoopWithSafeOrSpeculativeLoads(TheLoop, PSE.getSE(), DT, AC,
+ &NonDerefLoads, &Predicates)) {
reportVectorizationFailure(
"Loop may fault",
"Cannot vectorize potentially faulting early exit loop",
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
return false;
}
+ // Check non-dereferenceable loads if any.
+ for (LoadInst *LI : NonDerefLoads) {
+ // Only support unit-stride access for now.
+ int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
+ if (Stride != 1) {
+ reportVectorizationFailure("Loop contains strided unbound access",
+ "Cannot vectorize early exit loop with "
+ "speculative strided load",
+ "SpeculativeNonUnitStrideLoadEarlyExitLoop",
+ ORE, TheLoop);
+ return false;
+ }
+ if (!TTI->isLegalSpeculativeLoad(LI->getType(), LI->getAlign())) {
+ reportVectorizationFailure("Loop may fault",
+ "Cannot vectorize early exit loop with "
+ "illegal speculative load",
+ "IllegalSpeculativeLoadEarlyExitLoop", ORE,
+ TheLoop);
+ return false;
+ }
+ SpeculativeLoads.insert(LI);
+ LLVM_DEBUG(dbgs() << "LV: Found speculative load: " << *LI << "\n");
+ }
[[maybe_unused]] const SCEV *SymbolicMaxBTC =
PSE.getSymbolicMaxBackedgeTakenCount();
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 70f884016d08c..174791d432a35 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9985,6 +9985,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}
+ if (!LVL.getSpeculativeLoads().empty()) {
+ reportVectorizationFailure("Auto-vectorization of loops with speculative "
+ "load is not supported",
+ "SpeculativeLoadsNotSupported", ORE, L);
+ return false;
+ }
+
// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
// even evaluating whether vectorization is profitable. Since we cannot modify
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll b/llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll
new file mode 100644
index 0000000000000..07e64784da84b
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll
@@ -0,0 +1,84 @@
+; REQUIRES: asserts
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s
+
+define ptr @unsupported_data_type(ptr %first, ptr %last, i128 %value) {
+; CHECK-LABEL: LV: Checking a loop in 'unsupported_data_type'
+; CHECK: LV: Not vectorizing: Loop may fault.
+entry:
+ %cond = icmp eq ptr %first, %last
+ br i1 %cond, label %return, label %for.body
+
+for.body:
+ %first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ]
+ %1 = load i128, ptr %first.addr, align 4
+ %cond2 = icmp eq i128 %1, %value
+ br i1 %cond2, label %for.end, label %for.inc
+
+for.inc:
+ %first.next = getelementptr inbounds i128, ptr %first.addr, i64 1
+ %cond3 = icmp eq ptr %first.next, %last
+ br i1 %cond3, label %for.end, label %for.body
+
+for.end:
+ %retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ]
+ br label %return
+
+return:
+ %retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ]
+ ret ptr %retval
+}
+
+define ptr @unbound_strided_access(ptr %first, ptr %last, i32 %value) {
+; CHECK-LABEL: LV: Checking a loop in 'unbound_strided_access'
+; CHECK: LV: Not vectorizing: Loop contains strided unbound access.
+entry:
+ %cond = icmp eq ptr %first, %last
+ br i1 %cond, label %return, label %for.body
+
+for.body:
+ %first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ]
+ %1 = load i32, ptr %first.addr, align 4
+ %cond2 = icmp eq i32 %1, %value
+ br i1 %cond2, label %for.end, label %for.inc
+
+for.inc:
+ %first.next = getelementptr inbounds i32, ptr %first.addr, i64 2
+ %cond3 = icmp eq ptr %first.next, %last
+ br i1 %cond3, label %for.end, label %for.body
+
+for.end:
+ %retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ]
+ br label %return
+
+return:
+ %retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ]
+ ret ptr %retval
+}
+
+define ptr @single_unbound_access(ptr %first, ptr %last, i32 %value) {
+; CHECK-LABEL: LV: Checking a loop in 'single_unbound_access'
+; CHECK: LV: We can vectorize this loop!
+; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with speculative load is not supported.
+entry:
+ %cond = icmp eq ptr %first, %last
+ br i1 %cond, label %return, label %for.body
+
+for.body:
+ %first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ]
+ %1 = load i32, ptr %first.addr, align 4
+ %cond2 = icmp eq i32 %1, %value
+ br i1 %cond2, label %for.end, label %for.inc
+
+for.inc:
+ %first.next = getelementptr inbounds i32, ptr %first.addr, i64 1
+ %cond3 = icmp eq ptr %first.next, %last
+ br i1 %cond3, label %for.end, label %for.body
+
+for.end:
+ %retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ]
+ br label %return
+
+return:
+ %retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ]
+ ret ptr %retval
+}
diff --git a/llvm/unittests/Analysis/LoadsTest.cpp b/llvm/unittests/Analysis/LoadsTest.cpp
index c4f5b22318e34..fab2aeb745ad0 100644
--- a/llvm/unittests/Analysis/LoadsTest.cpp
+++ b/llvm/unittests/Analysis/LoadsTest.cpp
@@ -195,7 +195,10 @@ loop.end:
assert(Header->getName() == "loop");
Loop *L = LI.getLoopFor(Header);
- return isDereferenceableReadOnlyLoop(L, &SE, &DT, &AC);
+ SmallVector<LoadInst *, 4> NonDerefLoads;
+ return isReadOnlyLoopWithSafeOrSpeculativeLoads(L, &SE, &DT, &AC,
+ &NonDerefLoads) &&
+ NonDerefLoads.empty();
};
ASSERT_TRUE(IsDerefReadOnlyLoop(F1));
>From 14e199f9e40019763f2e82ab7eda29b9f3fbd083 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Fri, 25 Jul 2025 16:24:16 -0700
Subject: [PATCH 2/3] Enable earlyexit loop with EVL
---
.../Vectorize/LoopVectorizationLegality.cpp | 10 ++-
.../Transforms/Vectorize/LoopVectorize.cpp | 5 +-
.../Transforms/Vectorize/VPlanTransforms.cpp | 25 ++++++
...ectorize-force-tail-with-evl-early-exit.ll | 86 +++++++++++++++++++
4 files changed, 118 insertions(+), 8 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-early-exit.ll
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index eef37811cc0d6..39d0cd3c4a452 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1943,10 +1943,12 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
bool LoopVectorizationLegality::canFoldTailByMasking() const {
// The only loops we can vectorize without a scalar epilogue, are loops with
- // a bottom-test and a single exiting block. We'd have to handle the fact
- // that not every instruction executes on the last iteration. This will
- // require a lane mask which varies through the vector loop body. (TODO)
- if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
+ // a bottom-test and a single exiting block or those with early exits. We'd
+ // have to handle the fact that not every instruction executes on the last
+ // iteration. This will require a lane mask which varies through the vector
+ // loop body. (TODO)
+ if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch() &&
+ !hasUncountableEarlyExit()) {
LLVM_DEBUG(
dbgs()
<< "LV: Cannot fold tail by masking. Requires a singe latch exit\n");
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 174791d432a35..4ace46ed8e6f5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8668,10 +8668,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// count is >= increment and a multiple of the increment.
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
if (!HasNUW) {
- auto *IVInc = Plan->getVectorLoopRegion()
- ->getExitingBasicBlock()
- ->getTerminator()
- ->getOperand(0);
+ auto *IVInc = Plan->getCanonicalIV()->getBackedgeValue();
assert(match(IVInc, m_VPInstruction<Instruction::Add>(
m_Specific(Plan->getCanonicalIV()), m_VPValue())) &&
"Did not find the canonical IV increment");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index cff43c2742a6b..aca189280a938 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2040,6 +2040,16 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
// Replace the original terminator with BranchOnCond. We have to invert the
// mask here because a true condition means jumping to the exit block.
auto *NotMask = Builder.createNot(ALM, DL);
+ using namespace VPlanPatternMatch;
+ if (VPValue *IsEarlyExitTaken = nullptr; match(
+ OriginalTerminator, m_BranchOnCond(m_BinaryOr(
+ m_VPValue(IsEarlyExitTaken), m_VPValue())))) {
+ auto *AnyExitTaken =
+ Builder.createNaryOp(Instruction::Or, {IsEarlyExitTaken, NotMask});
+ OriginalTerminator->setOperand(0, AnyExitTaken);
+ return LaneMaskPhi;
+ }
+
Builder.createNaryOp(VPInstruction::BranchOnCond, {NotMask}, DL);
OriginalTerminator->eraseFromParent();
return LaneMaskPhi;
@@ -2480,6 +2490,21 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) {
// Skip single-iteration loop region
if (match(LatchExitingBr, m_BranchOnCond(m_True())))
return;
+
+ // Replace VectorTripCount used in loop with early-exits
+ if (VPValue *VPMainExitCond = nullptr;
+ match(LatchExitingBr, m_BranchOnCond(m_BinaryOr(
+ m_VPValue(), m_VPValue(VPMainExitCond)))) &&
+ match(VPMainExitCond, m_VPInstruction<Instruction::ICmp>(
+ m_Specific(EVLIncrement),
+ m_Specific(&Plan.getVectorTripCount())))) {
+ // Expected pattern here is:
+ // EMIT vp<%main.exit.cond> = icmp eq vp<%evl.next>, vp<%vtc>
+ // EMIT vp<%exit.cond> = or vp<%alt.exit.cond>, vp<%main.exit.cond>
+ // EMIT branch-on-cond vp<%exit.cond>
+ VPMainExitCond->getDefiningRecipe()->setOperand(1, Plan.getTripCount());
+ return;
+ }
assert(LatchExitingBr &&
match(LatchExitingBr,
m_BranchOnCount(m_VPValue(EVLIncrement),
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-early-exit.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-early-exit.ll
new file mode 100644
index 0000000000000..f126771449d5d
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-early-exit.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize -force-tail-folding-style=data-with-evl -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -S -enable-early-exit-vectorization %s | FileCheck %s
+
+declare void @init(ptr)
+
+define i64 @multi_exiting_to_different_exits_live_in_exit_values() {
+; CHECK-LABEL: define i64 @multi_exiting_to_different_exits_live_in_exit_values(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SRC:%.*]] = alloca [128 x i32], align 4
+; CHECK-NEXT: call void @init(ptr [[SRC]])
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 128, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[EVL_BASED_IV]]
+; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[GEP_SRC]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 10)
+; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP5]] to i64
+; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP15]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP12]])
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 128
+; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]]
+; CHECK-NEXT: br i1 [[TMP19]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_SPLIT]]:
+; CHECK-NEXT: br i1 [[TMP17]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[E2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT]]:
+; CHECK-NEXT: br label %[[E1:.*]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[INC:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[SCALAR_PH]] ]
+; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV1]]
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC1]], align 4
+; CHECK-NEXT: [[C_1:%.*]] = icmp eq i32 [[L]], 10
+; CHECK-NEXT: br i1 [[C_1]], label %[[E1]], label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[INC]] = add nuw i64 [[IV1]], 1
+; CHECK-NEXT: [[C_2:%.*]] = icmp eq i64 [[INC]], 128
+; CHECK-NEXT: br i1 [[C_2]], label %[[E2]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[E1]]:
+; CHECK-NEXT: [[P1:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: ret i64 [[P1]]
+; CHECK: [[E2]]:
+; CHECK-NEXT: [[P2:%.*]] = phi i64 [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i64 [[P2]]
+;
+entry:
+ %src = alloca [128 x i32]
+ call void @init(ptr %src)
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ]
+ %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+ %l = load i32, ptr %gep.src
+ %c.1 = icmp eq i32 %l, 10
+ br i1 %c.1, label %e1, label %loop.latch
+
+loop.latch:
+ %inc = add nuw i64 %iv, 1
+ %c.2 = icmp eq i64 %inc, 128
+ br i1 %c.2, label %e2, label %loop.header
+
+e1:
+ %p1 = phi i64 [ 0, %loop.header ]
+ ret i64 %p1
+
+e2:
+ %p2 = phi i64 [ 1, %loop.latch ]
+ ret i64 %p2
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.isvectorized.tailfoldingstyle", !"evl"}
+; CHECK: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META3]], [[META1]]}
+;.
>From 1bb79f8d061cc34f1c1c8d60f89264729c9440a9 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Wed, 12 Mar 2025 00:14:28 -0700
Subject: [PATCH 3/3] Add WidenFFLoad
---
.../Transforms/Vectorize/LoopVectorize.cpp | 36 +++-
llvm/lib/Transforms/Vectorize/VPlan.h | 70 ++++++++
.../Transforms/Vectorize/VPlanAnalysis.cpp | 3 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 56 ++++++
.../Transforms/Vectorize/VPlanTransforms.cpp | 69 ++++++--
llvm/lib/Transforms/Vectorize/VPlanValue.h | 4 +
.../Transforms/Vectorize/VPlanVerifier.cpp | 5 +-
.../Transforms/LoopVectorize/RISCV/find.ll | 161 ++++++++++++++++++
8 files changed, 386 insertions(+), 18 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/find.ll
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4ace46ed8e6f5..8b8b194aa9af0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -402,6 +402,10 @@ static cl::opt<bool> EnableEarlyExitVectorization(
cl::desc(
"Enable vectorization of early exit loops with uncountable exits."));
+static cl::opt<bool> EnableSpeculativeLoads(
+ "enable-speculative-load", cl::init(false), cl::Hidden,
+ cl::desc("Enable vectorization of loops with speculative loads."));
+
// Likelyhood of bypassing the vectorized loop because there are zero trips left
// after prolog. See `emitIterationCountCheck`.
static constexpr uint32_t MinItersBypassWeights[] = {1, 127};
@@ -1365,6 +1369,9 @@ class LoopVectorizationCostModel {
if (ChosenTailFoldingStyle->first != TailFoldingStyle::DataWithEVL &&
ChosenTailFoldingStyle->second != TailFoldingStyle::DataWithEVL)
return;
+ // Do not override EVL styles for speculative loads.
+ if (!Legal->getSpeculativeLoads().empty())
+ return;
// Override EVL styles if needed.
// FIXME: Investigate opportunity for fixed vector factor.
bool EVLIsLegal = UserIC <= 1 && IsScalableVF &&
@@ -4164,6 +4171,8 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPWidenPointerInductionSC:
case VPDef::VPReductionPHISC:
case VPDef::VPInterleaveSC:
+ case VPDef::VPWidenFFLoadEVLSC:
+ case VPDef::VPWidenFFLoadSC:
case VPDef::VPWidenLoadEVLSC:
case VPDef::VPWidenLoadSC:
case VPDef::VPWidenStoreEVLSC:
@@ -7749,6 +7758,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
Builder.insert(VectorPtr);
Ptr = VectorPtr;
}
+ if (Legal->getSpeculativeLoads().contains(I)) {
+ auto *Load = dyn_cast<LoadInst>(I);
+ return new VPWidenFFLoadRecipe(*Load, Ptr, Mask, VPIRMetadata(*Load, LVer),
+ I->getDebugLoc());
+ }
+
if (LoadInst *Load = dyn_cast<LoadInst>(I))
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
VPIRMetadata(*Load, LVer), I->getDebugLoc());
@@ -9982,7 +9997,26 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}
- if (!LVL.getSpeculativeLoads().empty()) {
+ if (EnableSpeculativeLoads) {
+ // VPWidenFFLoadEVLRecipe is currently the only concrete recipe that
+ // generates speculative load intrinsics. Since it relies on the EVL
+ // transform, speculative loads are only supported when tail-folding with
+ // EVL is enabled.
+ if (ForceTailFoldingStyle != TailFoldingStyle::DataWithEVL ||
+ PreferPredicateOverEpilogue !=
+ PreferPredicateTy::PredicateOrDontVectorize) {
+ reportVectorizationFailure("Auto-vectorization of loops with speculative "
+ "load is not enabled",
+ "SpeculativeLoadsDisabled", ORE, L);
+ return false;
+ }
+ if (LVL.getSpeculativeLoads().size() > 1) {
+ reportVectorizationFailure("Auto-vectorization of loops with more than 1 "
+ "speculative load is not enabled",
+ "MoreThanOneSpeculativeLoads", ORE, L);
+ return false;
+ }
+ } else if (!LVL.getSpeculativeLoads().empty()) {
reportVectorizationFailure("Auto-vectorization of loops with speculative "
"load is not supported",
"SpeculativeLoadsNotSupported", ORE, L);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 46e55be3f643b..6bacfb4ec7337 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -559,6 +559,8 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPBranchOnMaskSC:
case VPRecipeBase::VPInterleaveSC:
case VPRecipeBase::VPIRInstructionSC:
+ case VPRecipeBase::VPWidenFFLoadEVLSC:
+ case VPRecipeBase::VPWidenFFLoadSC:
case VPRecipeBase::VPWidenLoadEVLSC:
case VPRecipeBase::VPWidenLoadSC:
case VPRecipeBase::VPWidenStoreEVLSC:
@@ -3026,6 +3028,8 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenFFLoadSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenFFLoadEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
}
@@ -3107,6 +3111,72 @@ struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe,
}
};
+struct VPWidenFFLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
+ VPWidenFFLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
+ const VPIRMetadata &Metadata, DebugLoc DL)
+ : VPWidenMemoryRecipe(VPDef::VPWidenFFLoadSC, Load, {Addr},
+ /*Consecutive*/ true, /*Reverse*/ false, Metadata,
+ DL),
+ VPValue(this, &Load) {
+ setMask(Mask);
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenFFLoadSC);
+
+ void execute(VPTransformState &State) override {
+ llvm_unreachable("cannot execute this recipe, should be replaced by "
+ "VPWidenFFLoadEVLRecipe");
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return Op == getAddr();
+ }
+};
+
+struct VPWidenFFLoadEVLRecipe final : public VPWidenMemoryRecipe,
+ public VPValue {
+ VPWidenFFLoadEVLRecipe(VPWidenFFLoadRecipe &L, VPValue &EVL, VPValue *Mask)
+ : VPWidenMemoryRecipe(VPDef::VPWidenFFLoadEVLSC, L.getIngredient(),
+ {L.getAddr(), &EVL}, true, false, L,
+ L.getDebugLoc()),
+ VPValue(this, &getIngredient()) {
+ new VPValue(nullptr, this); // newVL
+ setMask(Mask);
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenFFLoadEVLSC);
+
+ /// Return the EVL operand.
+ VPValue *getEVL() const { return getOperand(1); }
+
+ /// Generate a wide load or gather.
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ // Widened, consecutive loads operations only demand the first lane of
+ // their address.
+ return Op == getEVL() || Op == getAddr();
+ }
+};
+
/// A recipe for widening load operations with vector-predication intrinsics,
/// using the address to load from, the explicit vector length and an optional
/// mask.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index b39231f106300..a5502aff92ac1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -187,7 +187,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
}
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
- assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
+ assert((isa<VPWidenLoadRecipe, VPWidenFFLoadRecipe, VPWidenFFLoadEVLRecipe,
+ VPWidenLoadEVLRecipe>(R)) &&
"Store recipes should not define any values");
return cast<LoadInst>(&R->getIngredient())->getType();
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 7ca9b230f5aae..238583cc4ade2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -84,6 +84,8 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPWidenIntOrFpInductionSC:
case VPWidenLoadEVLSC:
case VPWidenLoadSC:
+ case VPWidenFFLoadEVLSC:
+ case VPWidenFFLoadSC:
case VPWidenPHISC:
case VPWidenSC:
case VPWidenSelectSC: {
@@ -107,6 +109,8 @@ bool VPRecipeBase::mayReadFromMemory() const {
return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
case VPWidenLoadEVLSC:
case VPWidenLoadSC:
+ case VPWidenFFLoadEVLSC:
+ case VPWidenFFLoadSC:
return true;
case VPReplicateSC:
return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
@@ -184,6 +188,9 @@ bool VPRecipeBase::mayHaveSideEffects() const {
"underlying instruction has side-effects");
return false;
}
+ case VPWidenFFLoadEVLSC:
+ case VPWidenFFLoadSC:
+ return true;
case VPInterleaveSC:
return mayWriteToMemory();
case VPWidenLoadEVLSC:
@@ -3224,6 +3231,55 @@ void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
+void VPWidenFFLoadEVLRecipe::execute(VPTransformState &State) {
+ Type *ScalarDataTy = getLoadStoreType(&Ingredient);
+ auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
+ const Align Alignment = getLoadStoreAlignment(&Ingredient);
+
+ auto &Builder = State.Builder;
+ State.setDebugLocFrom(getDebugLoc());
+
+ Value *EVL = State.get(getEVL(), VPLane(0));
+ Value *Addr = State.get(getAddr(), true);
+ Value *Mask = nullptr;
+ if (VPValue *VPMask = getMask())
+ Mask = State.get(VPMask);
+ else
+ Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
+ CallInst *NewLI =
+ Builder.CreateIntrinsic(Intrinsic::vp_load_ff, {DataTy, Addr->getType()},
+ {Addr, Mask, EVL}, nullptr, "vp.op.load.ff");
+ NewLI->addParamAttr(
+ 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
+ applyMetadata(*NewLI);
+ Value *V = cast<Instruction>(Builder.CreateExtractValue(NewLI, 0));
+ Value *VL = Builder.CreateExtractValue(NewLI, 1);
+ State.set(getVPValue(0), V);
+ State.set(getVPValue(1), VL, /*NeedsScalar=*/true);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPWidenFFLoadRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN ";
+ printAsOperand(O, SlotTracker);
+ O << " = fault-only-first-load ";
+ printOperands(O, SlotTracker);
+}
+#endif
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPWidenFFLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN ";
+ printAsOperand(O, SlotTracker);
+ O << ", ";
+ getVPValue(1)->printAsOperand(O, SlotTracker);
+ O << " = vp.load.ff ";
+ printOperands(O, SlotTracker);
+}
+#endif
+
/// Use all-true mask for reverse rather than actual mask, as it avoids a
/// dependence w/o affecting the result.
static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index aca189280a938..11f2e6a5d4307 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2184,6 +2184,10 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
VPValue *NewAddr = GetNewAddr(L->getAddr());
return new VPWidenLoadEVLRecipe(*L, NewAddr, EVL, NewMask);
})
+ .Case<VPWidenFFLoadRecipe>([&](VPWidenFFLoadRecipe *L) {
+ VPValue *NewMask = GetNewMask(L->getMask());
+ return new VPWidenFFLoadEVLRecipe(*L, EVL, NewMask);
+ })
.Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
VPValue *NewMask = GetNewMask(S->getMask());
VPValue *NewAddr = GetNewAddr(S->getAddr());
@@ -2212,7 +2216,7 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
}
/// Replace recipes with their EVL variants.
-static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
+static VPValue *transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
VPTypeAnalysis TypeInfo(Plan);
VPValue *AllOneMask = Plan.getTrue();
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
@@ -2222,9 +2226,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe,
VPWidenIntOrFpInductionRecipe>) &&
"User of VF that we can't transform to EVL.");
- Plan.getVF().replaceUsesWithIf(&EVL, [](VPUser &U, unsigned Idx) {
- return isa<VPWidenIntOrFpInductionRecipe, VPScalarIVStepsRecipe>(U);
- });
assert(all_of(Plan.getVFxUF().users(),
[&Plan](VPUser *U) {
@@ -2247,6 +2248,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
// Create a scalar phi to track the previous EVL if fixed-order recurrence is
// contained.
+ VPInstruction *PrevEVL = nullptr;
bool ContainsFORs =
any_of(Header->phis(), IsaPred<VPFirstOrderRecurrencePHIRecipe>);
if (ContainsFORs) {
@@ -2259,8 +2261,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
TypeInfo.inferScalarType(MaxEVL), DebugLoc());
Builder.setInsertPoint(Header, Header->getFirstNonPhi());
- VPValue *PrevEVL =
- Builder.createScalarPhi({MaxEVL, &EVL}, DebugLoc(), "prev.evl");
+ // Postpone backedge insertion until LastEVL is known.
+ PrevEVL = Builder.createScalarPhi({MaxEVL}, DebugLoc(), "prev.evl");
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) {
@@ -2285,7 +2287,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
VPValue *HeaderMask = findHeaderMask(Plan);
if (!HeaderMask)
- return;
+ return &EVL;
// Replace header masks with a mask equivalent to predicating by EVL:
//
@@ -2313,15 +2315,21 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
continue;
[[maybe_unused]] unsigned NumDefVal = EVLRecipe->getNumDefinedValues();
- assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
- "New recipe must define the same number of values as the "
- "original.");
- assert(NumDefVal <= 1 &&
- "Only supports recipes with a single definition or without users.");
+ if (!isa<VPWidenFFLoadEVLRecipe>(EVLRecipe)) {
+ assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
+ "New recipe must define the same number of values as the "
+ "original.");
+ assert(
+ NumDefVal <= 1 &&
+ "Only supports recipes with a single definition or without users.");
+ }
EVLRecipe->insertBefore(CurRecipe);
if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe>(EVLRecipe)) {
VPValue *CurVPV = CurRecipe->getVPSingleValue();
CurVPV->replaceAllUsesWith(EVLRecipe->getVPSingleValue());
+ } else if (isa<VPWidenFFLoadEVLRecipe>(EVLRecipe)) {
+ VPValue *CurVPV = CurRecipe->getVPSingleValue();
+ CurVPV->replaceAllUsesWith(EVLRecipe->getVPValue(0));
}
ToErase.push_back(CurRecipe);
}
@@ -2329,12 +2337,44 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
if (EVLMask->getNumUsers() == 0)
ToErase.push_back(EVLMask->getDefiningRecipe());
+ // Find LastEVL
+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
+ Plan.getEntry());
+ VPValue *LastEVL = nullptr;
+
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
+ for (VPRecipeBase &CurRecipe : *VPBB) {
+ auto *VPI = dyn_cast<VPInstruction>(&CurRecipe);
+ if (VPI && (VPI->getOpcode() == VPInstruction::ExplicitVectorLength)) {
+ assert((LastEVL == nullptr) && "EVL should be set only once");
+ LastEVL = VPI;
+ continue;
+ }
+ if (!LastEVL)
+ continue;
+ if (isa<VPWidenFFLoadEVLRecipe>(CurRecipe)) {
+ LastEVL = CurRecipe.getVPValue(1);
+ }
+ }
+ }
+
+ Plan.getVF().replaceUsesWithIf(LastEVL, [](VPUser &U, unsigned Idx) {
+ return isa<VPWidenIntOrFpInductionRecipe>(U);
+ });
+ Plan.getVF().replaceUsesWithIf(&EVL, [](VPUser &U, unsigned Idx) {
+ return isa<VPScalarIVStepsRecipe>(U);
+ });
+
+ if (PrevEVL)
+ PrevEVL->addOperand(LastEVL);
+
for (VPRecipeBase *R : reverse(ToErase)) {
SmallVector<VPValue *> PossiblyDead(R->operands());
R->eraseFromParent();
for (VPValue *Op : PossiblyDead)
recursivelyDeleteDeadRecipes(Op);
}
+ return LastEVL;
}
/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
@@ -2408,10 +2448,11 @@ void VPlanTransforms::addExplicitVectorLength(
auto *VPEVL = Builder.createNaryOp(VPInstruction::ExplicitVectorLength, AVL,
DebugLoc());
+ VPValue *OpVPEVL = transformRecipestoEVLRecipes(Plan, *VPEVL);
+
auto *CanonicalIVIncrement =
cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());
Builder.setInsertPoint(CanonicalIVIncrement);
- VPValue *OpVPEVL = VPEVL;
auto *I32Ty = Type::getInt32Ty(Plan.getContext());
OpVPEVL = Builder.createScalarZExtOrTrunc(
@@ -2429,8 +2470,6 @@ void VPlanTransforms::addExplicitVectorLength(
DebugLoc::getCompilerGenerated(), "avl.next");
AVLPhi->addOperand(NextAVL);
- transformRecipestoEVLRecipes(Plan, *VPEVL);
-
// Replace all uses of VPCanonicalIVPHIRecipe by
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 24f6d61512ef6..179e557731ce9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -40,6 +40,7 @@ class VPUser;
class VPRecipeBase;
class VPInterleaveRecipe;
class VPPhiAccessors;
+class VPWidenFFLoadEVLRecipe;
// This is the base class of the VPlan Def/Use graph, used for modeling the data
// flow into, within and out of the VPlan. VPValues can stand for live-ins
@@ -51,6 +52,7 @@ class LLVM_ABI_FOR_TEST VPValue {
friend class VPInterleaveRecipe;
friend class VPlan;
friend class VPExpressionRecipe;
+ friend class VPWidenFFLoadEVLRecipe;
const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
@@ -348,6 +350,8 @@ class VPDef {
VPWidenCastSC,
VPWidenGEPSC,
VPWidenIntrinsicSC,
+ VPWidenFFLoadSC,
+ VPWidenFFLoadEVLSC,
VPWidenLoadEVLSC,
VPWidenLoadSC,
VPWidenStoreEVLSC,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index e25ffe135418e..49283c2a44605 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -166,7 +166,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
}
return VerifyEVLUse(*R, 2);
})
- .Case<VPWidenLoadEVLRecipe, VPVectorEndPointerRecipe>(
+ .Case<VPWidenLoadEVLRecipe, VPWidenFFLoadEVLRecipe,
+ VPVectorEndPointerRecipe>(
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
.Case<VPInstructionWithType>(
[&](const VPInstructionWithType *S) { return VerifyEVLUse(*S, 0); })
@@ -175,6 +176,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
I->getOpcode() == Instruction::ICmp ||
I->getOpcode() == Instruction::Sub)
return VerifyEVLUse(*I, 1);
+ if (I->getOpcode() == Instruction::Sub)
+ return VerifyEVLUse(*I, 0);
switch (I->getOpcode()) {
case Instruction::Add:
break;
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/find.ll b/llvm/test/Transforms/LoopVectorize/RISCV/find.ll
new file mode 100644
index 0000000000000..18f40da1ec710
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/find.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize -force-tail-folding-style=data-with-evl -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -enable-speculative-load -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s
+
+define ptr @find_with_liveout(ptr %first, ptr %last, ptr %value) {
+; CHECK-LABEL: define ptr @find_with_liveout(
+; CHECK-SAME: ptr [[FIRST:%.*]], ptr [[LAST:%.*]], ptr [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[CMP_NOT6:%.*]] = icmp eq ptr [[FIRST]], [[LAST]]
+; CHECK-NEXT: br i1 [[CMP_NOT6]], label %[[RETURN:.*]], label %[[FOR_BODY_LR_PH:.*]]
+; CHECK: [[FOR_BODY_LR_PH]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[VALUE]], align 4
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[FIRST_ADDR_07:%.*]] = phi ptr [ [[FIRST]], %[[FOR_BODY_LR_PH]] ], [ [[INCDEC_PTR:%.*]], %[[FOR_INC:.*]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[FIRST_ADDR_07]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TMP1]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP1]], label %[[RETURN_LOOPEXIT:.*]], label %[[FOR_INC]]
+; CHECK: [[FOR_INC]]:
+; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[FIRST_ADDR_07]], i64 1
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[LAST]]
+; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[RETURN_LOOPEXIT]], label %[[FOR_BODY]]
+; CHECK: [[RETURN_LOOPEXIT]]:
+; CHECK-NEXT: [[RETVAL_0_PH:%.*]] = phi ptr [ [[FIRST_ADDR_07]], %[[FOR_BODY]] ], [ [[LAST]], %[[FOR_INC]] ]
+; CHECK-NEXT: br label %[[RETURN]]
+; CHECK: [[RETURN]]:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi ptr [ [[FIRST]], %[[ENTRY]] ], [ [[RETVAL_0_PH]], %[[RETURN_LOOPEXIT]] ]
+; CHECK-NEXT: ret ptr [[RETVAL_0]]
+;
+entry:
+ %cmp.not6 = icmp eq ptr %first, %last
+ br i1 %cmp.not6, label %return, label %for.body.lr.ph
+
+for.body.lr.ph:
+ %0 = load i32, ptr %value, align 4
+ br label %for.body
+
+for.body:
+ %first.addr.07 = phi ptr [ %first, %for.body.lr.ph ], [ %incdec.ptr, %for.inc ]
+ %1 = load i32, ptr %first.addr.07, align 4
+ %cmp1 = icmp eq i32 %1, %0
+ br i1 %cmp1, label %return.loopexit, label %for.inc
+
+for.inc:
+ %incdec.ptr = getelementptr inbounds i32, ptr %first.addr.07, i64 1
+ %cmp.not = icmp eq ptr %incdec.ptr, %last
+ br i1 %cmp.not, label %return.loopexit, label %for.body
+
+return.loopexit:
+ %retval.0.ph = phi ptr [ %first.addr.07, %for.body ], [ %last, %for.inc ]
+ br label %return
+
+return:
+ %retval.0 = phi ptr [ %first, %entry ], [ %retval.0.ph, %return.loopexit ]
+ ret ptr %retval.0
+}
+
+define i32 @find_without_liveout(ptr %first, ptr %last, ptr %value) {
+; CHECK-LABEL: define i32 @find_without_liveout(
+; CHECK-SAME: ptr [[FIRST:%.*]], ptr [[LAST:%.*]], ptr [[VALUE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[FIRST4:%.*]] = ptrtoint ptr [[FIRST]] to i64
+; CHECK-NEXT: [[LAST3:%.*]] = ptrtoint ptr [[LAST]] to i64
+; CHECK-NEXT: [[FIRST2:%.*]] = ptrtoint ptr [[FIRST]] to i64
+; CHECK-NEXT: [[LAST1:%.*]] = ptrtoint ptr [[LAST]] to i64
+; CHECK-NEXT: [[CMP_NOT6:%.*]] = icmp eq ptr [[FIRST]], [[LAST]]
+; CHECK-NEXT: br i1 [[CMP_NOT6]], label %[[RETURN:.*]], label %[[FOR_BODY_LR_PH:.*]]
+; CHECK: [[FOR_BODY_LR_PH]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[VALUE]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[LAST3]], -4
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[FIRST4]]
+; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK: [[VECTOR_SCEVCHECK]]:
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[LAST1]] to i2
+; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[FIRST2]] to i2
+; CHECK-NEXT: [[TMP7:%.*]] = sub i2 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i2 [[TMP7]] to i64
+; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP0]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP4]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[EVL_BASED_IV]], 4
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[VP_OP_LOAD_FF:%.*]] = call { <vscale x 4 x i32>, i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr align 4 [[NEXT_GEP]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
+; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 4 x i32>, i32 } [[VP_OP_LOAD_FF]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 4 x i32>, i32 } [[VP_OP_LOAD_FF]], 1
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq <vscale x 4 x i32> [[TMP16]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP17]] to i64
+; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]]
+; CHECK-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP18]])
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP4]]
+; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP20]], [[TMP21]]
+; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_SPLIT]]:
+; CHECK-NEXT: br i1 [[TMP20]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[RETURN_LOOPEXIT:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT]]:
+; CHECK-NEXT: br label %[[RETURN_LOOPEXIT]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[FIRST]], %[[FOR_BODY_LR_PH]] ], [ [[FIRST]], %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[FIRST_ADDR_07:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], %[[FOR_INC:.*]] ]
+; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[FIRST_ADDR_07]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TMP23]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP1]], label %[[RETURN_LOOPEXIT]], label %[[FOR_INC]]
+; CHECK: [[FOR_INC]]:
+; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[FIRST_ADDR_07]], i64 1
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[LAST]]
+; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[RETURN_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[RETURN_LOOPEXIT]]:
+; CHECK-NEXT: [[RETVAL_0_PH:%.*]] = phi i32 [ 0, %[[FOR_BODY]] ], [ 1, %[[FOR_INC]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: br label %[[RETURN]]
+; CHECK: [[RETURN]]:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RETVAL_0_PH]], %[[RETURN_LOOPEXIT]] ]
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+entry:
+ %cmp.not6 = icmp eq ptr %first, %last
+ br i1 %cmp.not6, label %return, label %for.body.lr.ph
+
+for.body.lr.ph:
+ %0 = load i32, ptr %value, align 4
+ br label %for.body
+
+for.body:
+ %first.addr.07 = phi ptr [ %first, %for.body.lr.ph ], [ %incdec.ptr, %for.inc ]
+ %1 = load i32, ptr %first.addr.07, align 4
+ %cmp1 = icmp eq i32 %1, %0
+ br i1 %cmp1, label %return.loopexit, label %for.inc
+
+for.inc:
+ %incdec.ptr = getelementptr inbounds i32, ptr %first.addr.07, i64 1
+ %cmp.not = icmp eq ptr %incdec.ptr, %last
+ br i1 %cmp.not, label %return.loopexit, label %for.body
+
+return.loopexit:
+ %retval.0.ph = phi i32 [ 0, %for.body ], [ 1, %for.inc ]
+ br label %return
+
+return:
+ %retval.0 = phi i32 [ 0, %entry ], [ %retval.0.ph, %return.loopexit ]
+ ret i32 %retval.0
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.isvectorized.tailfoldingstyle", !"evl"}
+; CHECK: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
+;.
More information about the llvm-commits
mailing list