[llvm] [SLP] Delete vectorized users when tree contains an invalid cost (PR #86344)
Patrick O'Neill via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 22 14:33:54 PDT 2024
https://github.com/patrick-rivos created https://github.com/llvm/llvm-project/pull/86344
If a tree is partially vectorized and contains an unvectorizable op, the users of the partially vectorized subtree will not be marked for deletion/cleaned up. With this patch the cleanup code is triggered even when an invalid cost is encountered.
>From cd70ee07f2d077ed697897934b8120e9d1a9928b Mon Sep 17 00:00:00 2001
From: Patrick O'Neill <patrick at rivosinc.com>
Date: Fri, 22 Mar 2024 14:18:19 -0700
Subject: [PATCH] [SLP] Delete vectorized users when tree contains an invalid
cost
If a tree is partially vectorized and contains an unvectorizable op, the
users of the partially vectorized subtree will not be marked for
deletion/cleaned up. With this patch the cleanup code is triggered
even when an invalid cost is encountered.
Signed-off-by: Patrick O'Neill <patrick at rivosinc.com>
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
.../RISCV/partial-vec-invalid-cost.ll | 57 +++++++++++++++++++
2 files changed, 58 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0f7afa2fc25c29..f98d15c285a693 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15959,7 +15959,7 @@ class HorizontalReduction {
LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost
<< " for reduction\n");
if (!Cost.isValid())
- return nullptr;
+ break;
if (Cost >= -SLPCostThreshold) {
V.getORE()->emit([&]() {
return OptimizationRemarkMissed(
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll
new file mode 100644
index 00000000000000..31f16801b7a64e
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s
+
+target triple = "riscv64-unknown-linux-gnu"
+
+define void @partial_vec_invalid_cost() #0 {
+; CHECK-LABEL: define void @partial_vec_invalid_cost(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[LSHR_1:%.*]] = lshr i96 0, 0
+; CHECK-NEXT: [[LSHR_2:%.*]] = lshr i96 0, 0
+; CHECK-NEXT: [[TRUNC_I96_1:%.*]] = trunc i96 [[LSHR_1]] to i32
+; CHECK-NEXT: [[TRUNC_I96_2:%.*]] = trunc i96 [[LSHR_2]] to i32
+; CHECK-NEXT: [[TRUNC_I96_3:%.*]] = trunc i96 0 to i32
+; CHECK-NEXT: [[TRUNC_I96_4:%.*]] = trunc i96 0 to i32
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> zeroinitializer)
+; CHECK-NEXT: [[OP_RDX:%.*]] = or i32 [[TMP0]], [[TRUNC_I96_2]]
+; CHECK-NEXT: [[OP_RDX1:%.*]] = or i32 [[TRUNC_I96_1]], [[TRUNC_I96_3]]
+; CHECK-NEXT: [[OP_RDX2:%.*]] = or i32 [[OP_RDX]], [[OP_RDX1]]
+; CHECK-NEXT: [[OP_RDX3:%.*]] = or i32 [[OP_RDX2]], [[TRUNC_I96_4]]
+; CHECK-NEXT: [[STORE_THIS:%.*]] = zext i32 [[OP_RDX3]] to i96
+; CHECK-NEXT: store i96 [[STORE_THIS]], ptr null, align 16
+; CHECK-NEXT: ret void
+;
+entry:
+
+ %lshr.1 = lshr i96 0, 0 ; These ops
+ %lshr.2 = lshr i96 0, 0 ; return an
+ %add.0 = add i96 0, 0 ; invalid
+ %add.1 = add i96 0, 0 ; vector cost.
+
+ %trunc.i96.1 = trunc i96 %lshr.1 to i32 ; These ops
+ %trunc.i96.2 = trunc i96 %lshr.2 to i32 ; return an
+ %trunc.i96.3 = trunc i96 %add.0 to i32 ; invalid
+ %trunc.i96.4 = trunc i96 %add.1 to i32 ; vector cost.
+
+ %or.0 = or i32 %trunc.i96.1, %trunc.i96.2
+ %or.1 = or i32 %or.0, %trunc.i96.3
+ %or.2 = or i32 %or.1, %trunc.i96.4
+
+ %zext.0 = zext i1 0 to i32 ; These
+ %zext.1 = zext i1 0 to i32 ; ops
+ %zext.2 = zext i1 0 to i32 ; are
+ %zext.3 = zext i1 0 to i32 ; vectorized
+
+ %or.3 = or i32 %or.2, %zext.0 ; users
+ %or.4 = or i32 %or.3, %zext.1 ; of
+ %or.5 = or i32 %or.4, %zext.2 ; vectorized
+ %or.6 = or i32 %or.5, %zext.3 ; ops
+
+ %store.this = zext i32 %or.6 to i96
+
+ store i96 %store.this, ptr null, align 16
+ ret void
+}
+
+attributes #0 = { "target-features"="+v" }
More information about the llvm-commits
mailing list