[llvm] ba8126b - [LV] Mark dead instructions in loop as free.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 24 01:32:00 PDT 2024
Author: Florian Hahn
Date: 2024-07-24T09:31:32+01:00
New Revision: ba8126b6fef79bd344a247f6291aaec7b67bdff0
URL: https://github.com/llvm/llvm-project/commit/ba8126b6fef79bd344a247f6291aaec7b67bdff0
DIFF: https://github.com/llvm/llvm-project/commit/ba8126b6fef79bd344a247f6291aaec7b67bdff0.diff
LOG: [LV] Mark dead instructions in loop as free.
Update collectValuesToIgnore to also ignore dead instructions in the
loop. Such instructions will be removed by VPlan-based DCE and won't be
considered by the VPlan-based cost model.
This closes a gap between the legacy and VPlan-based cost model. In
practice with the default pipelines, there shouldn't be any dead
instructions in loops reaching LoopVectorize, but it is easy to generate
such cases by hand or automatically via fuzzers.
Fixes https://github.com/llvm/llvm-project/issues/99701.
Added:
llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8fa1a57177d93..e4477f96aa624 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -137,6 +137,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
@@ -6681,6 +6682,7 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
SmallVector<Value *, 4> DeadInterleavePointerOps;
+ SmallVector<Value *, 4> DeadOps;
for (BasicBlock *BB : TheLoop->blocks())
for (Instruction &I : *BB) {
// Find all stores to invariant variables. Since they are going to sink
@@ -6690,6 +6692,17 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
Legal->isInvariantAddressOfReduction(SI->getPointerOperand()))
ValuesToIgnore.insert(&I);
+ if (VecValuesToIgnore.contains(&I) || ValuesToIgnore.contains(&I))
+ continue;
+
+ // Add instructions that would be trivially dead and are only used by
+ // values already ignored to DeadOps to seed worklist.
+ if (wouldInstructionBeTriviallyDead(&I, TLI) &&
+ all_of(I.users(), [this](User *U) {
+ return VecValuesToIgnore.contains(U) || ValuesToIgnore.contains(U);
+ }))
+ DeadOps.push_back(&I);
+
// For interleave groups, we only create a pointer for the start of the
// interleave group. Queue up addresses of group members except the insert
// position for further processing.
@@ -6717,6 +6730,29 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
DeadInterleavePointerOps.append(Op->op_begin(), Op->op_end());
}
+ // Mark ops that would be trivially dead and are only used by ignored
+ // instructions as free.
+ for (unsigned I = 0; I != DeadOps.size(); ++I) {
+ auto *Op = dyn_cast<Instruction>(DeadOps[I]);
+ // Skip any op that shouldn't be considered dead.
+ if (!Op || !TheLoop->contains(Op) ||
+ !wouldInstructionBeTriviallyDead(Op, TLI) ||
+ any_of(Op->users(), [this](User *U) {
+ return !VecValuesToIgnore.contains(U) && !ValuesToIgnore.contains(U);
+ }))
+ continue;
+
+ // If all of Op's users are in ValuesToIgnore, add it to ValuesToIgnore
+ // which applies for both scalar and vector versions. Otherwise it is only
+ // dead in vector versions, so only add it to VecValuesToIgnore.
+ if (all_of(Op->users(),
+ [this](User *U) { return ValuesToIgnore.contains(U); }))
+ ValuesToIgnore.insert(Op);
+
+ VecValuesToIgnore.insert(Op);
+ DeadOps.append(Op->op_begin(), Op->op_end());
+ }
+
// Ignore type-promoting instructions we identified during reduction
// detection.
for (const auto &Reduction : Legal->getReductionVars()) {
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
new file mode 100644
index 0000000000000..9db4108de4b0a
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-vectorize -mtriple riscv64-linux-gnu -mattr=+v,+f -S %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+
+; Test with a dead load in the loop, from
+; https://github.com/llvm/llvm-project/issues/99701
+define void @dead_load(ptr %p, i16 %start) {
+; CHECK-LABEL: define void @dead_load(
+; CHECK-SAME: ptr [[P:%.*]], i16 [[START:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[START_EXT:%.*]] = sext i16 [[START]] to i64
+; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[START_EXT]], i64 111)
+; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[SMAX]], [[START_EXT]]
+; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 1)
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[SMAX]], [[UMIN]]
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[START_EXT]]
+; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP2]], 3
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[UMIN]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP5]], [[TMP7]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP5]], [[TMP9]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 [[N_MOD_VF]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP5]], [[TMP11]]
+; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[N_VEC]], 3
+; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START_EXT]], [[TMP12]]
+; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 8
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[START_EXT]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i64> @llvm.experimental.stepvector.nxv8i64()
+; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 8 x i64> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 8 x i64> [[TMP16]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 3, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> [[DOTSPLAT]], [[TMP17]]
+; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8
+; CHECK-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP19]]
+; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP20]], i64 0
+; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT1]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[P]], <vscale x 8 x i64> [[VEC_IND]]
+; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP21]], i32 2, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT2]]
+; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START_EXT]], %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2
+; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 3
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV]], 111
+; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %start.ext = sext i16 %start to i64
+ br label %loop
+
+loop:
+ %iv = phi i64 [ %start.ext, %entry ], [ %iv.next, %loop ]
+ %gep = getelementptr i16, ptr %p, i64 %iv
+ store i16 0, ptr %gep, align 2
+ %l = load i16, ptr %gep, align 2
+ %iv.next = add i64 %iv, 3
+ %cmp = icmp slt i64 %iv, 111
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret void
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
More information about the llvm-commits
mailing list