[llvm] [VPlan] Introduce cannotHoistOrSinkRecipe (PR #162674)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 9 08:28:18 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Ramkumar Ramachandra (artagnon)
<details>
<summary>Changes</summary>
Factor out common code to determine legality of hoisting and sinking. In the case of the additional use-sites, functional changes, if any, would amount to esoteric bugs being fixed.
-- 8< --
Based on https://github.com/llvm/llvm-project/pull/162636.
---
Patch is 20.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162674.diff
8 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+9)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+4)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+35-21)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll (+8-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-model.ll (+2-8)
- (modified) llvm/test/Transforms/LoopVectorize/X86/funclet.ll (+25-4)
- (modified) llvm/test/Transforms/LoopVectorize/assume.ll (+10-10)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-assume.ll (+2-4)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index fb696bea671af..6ac46e62cbaf5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1554,6 +1554,9 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
/// True if the intrinsic may have side-effects.
bool MayHaveSideEffects;
+ /// True if the intrinsic is safe to speculatively execute.
+ bool IsSafeToSpeculativelyExecute;
+
public:
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
ArrayRef<VPValue *> CallArguments, Type *Ty,
@@ -1577,6 +1580,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
MayHaveSideEffects = MayWriteToMemory ||
!Attrs.hasAttribute(Attribute::NoUnwind) ||
!Attrs.hasAttribute(Attribute::WillReturn);
+ IsSafeToSpeculativelyExecute = Attrs.hasAttribute(Attribute::Speculatable);
}
~VPWidenIntrinsicRecipe() override = default;
@@ -1616,6 +1620,11 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
/// Returns true if the intrinsic may have side-effects.
bool mayHaveSideEffects() const { return MayHaveSideEffects; }
+ /// Returns true if the intrinsic is safe to speculatively execute.
+ bool isSafeToSpeculativelyExecute() const {
+ return IsSafeToSpeculativelyExecute;
+ }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 600ff8ac33fea..8e05655afc277 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -70,6 +70,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
case VPCanonicalIVPHISC:
case VPBranchOnMaskSC:
+ case VPDerivedIVSC:
case VPFirstOrderRecurrencePHISC:
case VPReductionPHISC:
case VPScalarIVStepsSC:
@@ -86,6 +87,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPWidenLoadEVLSC:
case VPWidenLoadSC:
case VPWidenPHISC:
+ case VPWidenPointerInductionSC:
case VPWidenSC:
case VPWidenSelectSC: {
const Instruction *I =
@@ -119,6 +121,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPWidenIntrinsicSC:
return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
case VPBranchOnMaskSC:
+ case VPDerivedIVSC:
case VPFirstOrderRecurrencePHISC:
case VPPredInstPHISC:
case VPScalarIVStepsSC:
@@ -134,6 +137,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
+ case VPWidenPointerInductionSC:
case VPWidenSC:
case VPWidenSelectSC: {
const Instruction *I =
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c8a2d84a535d3..3ccefc6b4fa5d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -30,6 +30,7 @@
#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
@@ -130,6 +131,24 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
return true;
}
+// Return true if we do not know how to (mechanically) hoist or sink a given
+// recipe out of a loop region. Does not address legality concerns such as
+// aliasing or speculation safety.
+static bool cannotHoistOrSinkRecipe(const VPRecipeBase &R) {
+ // Assumes don't alias anything or throw.
+ if (match(&R, m_Intrinsic<Intrinsic::assume>()))
+ return false;
+
+ // TODO: Relax checks in the future, e.g. we could also hoist or sink reads,
+ // if their memory location is not modified in the vector loop.
+ if (R.mayReadOrWriteMemory() || R.isPhi())
+ return true;
+
+ // Allocas cannot be hoisted or sunk.
+ auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
+ return RepR && RepR->getOpcode() == Instruction::Alloca;
+}
+
static bool sinkScalarOperands(VPlan &Plan) {
auto Iter = vp_depth_first_deep(Plan.getEntry());
bool Changed = false;
@@ -1764,7 +1783,7 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
VPDT.properlyDominates(Previous, SinkCandidate))
return true;
- if (SinkCandidate->mayHaveSideEffects())
+ if (cannotHoistOrSinkRecipe(*SinkCandidate))
return false;
WorkList.push_back(SinkCandidate);
@@ -1804,7 +1823,7 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
VPRecipeBase *Previous,
VPDominatorTree &VPDT) {
- if (Previous->mayHaveSideEffects() || Previous->mayReadFromMemory())
+ if (cannotHoistOrSinkRecipe(*Previous))
return false;
// Collect recipes that need hoisting.
@@ -1851,11 +1870,6 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
return nullptr;
return HoistCandidate;
};
- auto CanHoist = [&](VPRecipeBase *HoistCandidate) {
- // Avoid hoisting candidates with side-effects, as we do not yet analyze
- // associated dependencies.
- return !HoistCandidate->mayHaveSideEffects();
- };
if (!NeedsHoisting(Previous->getVPSingleValue()))
return true;
@@ -1867,7 +1881,7 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
VPRecipeBase *Current = HoistCandidates[I];
assert(Current->getNumDefinedValues() == 1 &&
"only recipes with a single defined value expected");
- if (!CanHoist(Current))
+ if (cannotHoistOrSinkRecipe(*Current))
return false;
for (VPValue *Op : Current->operands()) {
@@ -2087,31 +2101,31 @@ void VPlanTransforms::cse(VPlan &Plan) {
}
}
+static bool isSafeToSpeculativelyExecute(VPRecipeBase *R) {
+ if (auto *WC = dyn_cast<VPWidenCallRecipe>(R))
+ return WC->getCalledScalarFunction()->isSpeculatable();
+ if (auto *WI = dyn_cast<VPWidenIntrinsicRecipe>(R))
+ return WI->isSafeToSpeculativelyExecute();
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(R))
+ return isSafeToSpeculativelyExecute(RepR->getUnderlyingInstr());
+ return !R->mayHaveSideEffects();
+}
+
/// Move loop-invariant recipes out of the vector loop region in \p Plan.
static void licm(VPlan &Plan) {
VPBasicBlock *Preheader = Plan.getVectorPreheader();
- // Return true if we do not know how to (mechanically) hoist a given recipe
- // out of a loop region. Does not address legality concerns such as aliasing
- // or speculation safety.
- auto CannotHoistRecipe = [](VPRecipeBase &R) {
- // Allocas cannot be hoisted.
- auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
- return RepR && RepR->getOpcode() == Instruction::Alloca;
- };
-
// Hoist any loop invariant recipes from the vector loop region to the
// preheader. Preform a shallow traversal of the vector loop region, to
// exclude recipes in replicate regions.
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ bool GuaranteedToExecute = Preheader->getSingleSuccessor() == LoopRegion;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(LoopRegion->getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- if (CannotHoistRecipe(R))
+ if (cannotHoistOrSinkRecipe(R))
continue;
- // TODO: Relax checks in the future, e.g. we could also hoist reads, if
- // their memory location is not modified in the vector loop.
- if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() ||
+ if ((!GuaranteedToExecute && !isSafeToSpeculativelyExecute(&R)) ||
any_of(R.operands(), [](VPValue *Op) {
return !Op->isDefinedOutsideLoopRegions();
}))
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
index 5970608794b55..48e64771ac152 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
@@ -45,8 +45,9 @@ define void @test1(ptr %dst, {i64, i64} %sv) {
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; FORCED: [[MIDDLE_BLOCK]]:
-; FORCED-NEXT: br [[EXIT:label %.*]]
-; FORCED: [[SCALAR_PH:.*:]]
+; FORCED-NEXT: br label %[[EXIT:.*]]
+; FORCED: [[EXIT]]:
+; FORCED-NEXT: ret void
;
entry:
br label %loop.body
@@ -91,18 +92,19 @@ define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) {
; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { float, float } [[SV]], 1
; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0
; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]])
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
; FORCED: [[VECTOR_BODY]]:
; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]]
-; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]])
; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
-; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; FORCED: [[MIDDLE_BLOCK]]:
-; FORCED-NEXT: br [[EXIT:label %.*]]
-; FORCED: [[SCALAR_PH:.*:]]
+; FORCED-NEXT: br label %[[EXIT:.*]]
+; FORCED: [[EXIT]]:
+; FORCED-NEXT: ret void
;
entry:
br label %loop.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 9453ad7c61f68..725fa49c0930c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -540,6 +540,8 @@ define i64 @cost_assume(ptr %end, i64 %N) {
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i64 [[N:%.*]], 0
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -551,14 +553,6 @@ define i64 @cost_assume(ptr %end, i64 %N) {
; CHECK-NEXT: [[TMP8]] = add <2 x i64> [[VEC_PHI2]], splat (i64 1)
; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[VEC_PHI3]], splat (i64 1)
; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[VEC_PHI4]], splat (i64 1)
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll
index 84d21110e8da9..f5a30bef177df 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll
@@ -1,11 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i686-pc-windows-msvc18.0.0"
define void @test1() #0 personality ptr @__CxxFrameHandler3 {
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] personality ptr @__CxxFrameHandler3 {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: invoke void @_CxxThrowException(ptr null, ptr null)
+; CHECK-NEXT: to label %[[UNREACHABLE:.*]] unwind label %[[CATCH_DISPATCH:.*]]
+; CHECK: [[CATCH_DISPATCH]]:
+; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller
+; CHECK: [[CATCH:.*]]:
+; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr null, i32 64, ptr null]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_COND_CLEANUP:.*]]:
+; CHECK-NEXT: catchret from [[TMP1]] to label %[[TRY_CONT:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ 0, %[[CATCH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[CALL:%.*]] = call double @floor(double 1.000000e+00) #[[ATTR1:[0-9]+]] [ "funclet"(token [[TMP1]]) ]
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
+; CHECK: [[TRY_CONT]]:
+; CHECK-NEXT: ret void
+; CHECK: [[UNREACHABLE]]:
+; CHECK-NEXT: unreachable
+;
entry:
invoke void @_CxxThrowException(ptr null, ptr null)
- to label %unreachable unwind label %catch.dispatch
+ to label %unreachable unwind label %catch.dispatch
catch.dispatch: ; preds = %entry
%0 = catchswitch within none [label %catch] unwind to caller
@@ -31,9 +55,6 @@ unreachable: ; preds = %entry
unreachable
}
-; CHECK-LABEL: define void @test1(
-; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [ptr null, i32 64, ptr null]
-; CHECK: call <16 x double> @llvm.floor.v16f64(<16 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
declare x86_stdcallcc void @_CxxThrowException(ptr, ptr)
diff --git a/llvm/test/Transforms/LoopVectorize/assume.ll b/llvm/test/Transforms/LoopVectorize/assume.ll
index 65c12a15406ff..224ec4a6f7a90 100644
--- a/llvm/test/Transforms/LoopVectorize/assume.ll
+++ b/llvm/test/Transforms/LoopVectorize/assume.ll
@@ -34,8 +34,9 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b)
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1600
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br [[FOR_END:label %.*]]
-; CHECK: [[SCALAR_PH:.*:]]
+; CHECK-NEXT: br label %[[FOR_END:.*]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: ret void
;
entry:
br label %for.body
@@ -73,29 +74,28 @@ define void @test2(ptr noalias %a, ptr noalias %b) {
; CHECK-NEXT: [[MASKCOND4:%.*]] = icmp eq i64 [[MASKEDPTR3]], 0
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]])
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP3]], align 4
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00)
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]])
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]])
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 2
; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[TMP7]], align 4
; CHECK-NEXT: store <2 x float> [[TMP6]], ptr [[TMP8]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1600
-; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br [[FOR_END:label %.*]]
-; CHECK: [[SCALAR_PH:.*:]]
+; CHECK-NEXT: br label %[[FOR_END:.*]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: ret void
;
entry:
%ptrint = ptrtoint ptr %a to i64
@@ -163,7 +163,7 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], [[FOR_COND_CLEANUP_LOOPEXIT:label %.*]], label %[[SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll
index ad8cd425cd6c2..667df3a6cc2a2 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll
@@ -88,11 +88,11 @@ define void @test2(ptr %a, ptr noalias %b) {
; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1600, [[TMP7]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1600, [[N_MOD_VF]]
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]])
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 1
@@ -101,8 +101,6 @@ define void @test2(ptr %a, ptr noalias %b) {
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 2 x float>, ptr [[TMP13]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = fadd <vscale x 2 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
; CHECK-NEXT: [[TMP15:%.*]] = fadd <vscale x 2 x float> [[WIDE_LOAD3]], splat (float 1...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/162674
More information about the llvm-commits
mailing list