[llvm] [SLP] Delete vectorized users when tree contains an invalid cost (PR #86344)

Patrick O'Neill via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 22 14:33:54 PDT 2024


https://github.com/patrick-rivos created https://github.com/llvm/llvm-project/pull/86344

If a tree is partially vectorized and contains an unvectorizable op, the users of the partially vectorized subtree will not be marked for deletion/cleaned up. With this patch the cleanup code is triggered even when an invalid cost is encountered.

>From cd70ee07f2d077ed697897934b8120e9d1a9928b Mon Sep 17 00:00:00 2001
From: Patrick O'Neill <patrick at rivosinc.com>
Date: Fri, 22 Mar 2024 14:18:19 -0700
Subject: [PATCH] [SLP] Delete vectorized users when tree contains an invalid
 cost

If a tree is partially vectorized and contains an unvectorizable op, the
users of the partially vectorized subtree will not be marked for
deletion/cleaned up. With this patch the cleanup code is triggered
even when an invalid cost is encountered.

Signed-off-by: Patrick O'Neill <patrick at rivosinc.com>
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    |  2 +-
 .../RISCV/partial-vec-invalid-cost.ll         | 57 +++++++++++++++++++
 2 files changed, 58 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0f7afa2fc25c29..f98d15c285a693 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15959,7 +15959,7 @@ class HorizontalReduction {
         LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost
                           << " for reduction\n");
         if (!Cost.isValid())
-          return nullptr;
+          break;
         if (Cost >= -SLPCostThreshold) {
           V.getORE()->emit([&]() {
             return OptimizationRemarkMissed(
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll
new file mode 100644
index 00000000000000..31f16801b7a64e
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s
+
+target triple = "riscv64-unknown-linux-gnu"
+
+define void @partial_vec_invalid_cost() #0 {
+; CHECK-LABEL: define void @partial_vec_invalid_cost(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LSHR_1:%.*]] = lshr i96 0, 0
+; CHECK-NEXT:    [[LSHR_2:%.*]] = lshr i96 0, 0
+; CHECK-NEXT:    [[TRUNC_I96_1:%.*]] = trunc i96 [[LSHR_1]] to i32
+; CHECK-NEXT:    [[TRUNC_I96_2:%.*]] = trunc i96 [[LSHR_2]] to i32
+; CHECK-NEXT:    [[TRUNC_I96_3:%.*]] = trunc i96 0 to i32
+; CHECK-NEXT:    [[TRUNC_I96_4:%.*]] = trunc i96 0 to i32
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[OP_RDX:%.*]] = or i32 [[TMP0]], [[TRUNC_I96_2]]
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = or i32 [[TRUNC_I96_1]], [[TRUNC_I96_3]]
+; CHECK-NEXT:    [[OP_RDX2:%.*]] = or i32 [[OP_RDX]], [[OP_RDX1]]
+; CHECK-NEXT:    [[OP_RDX3:%.*]] = or i32 [[OP_RDX2]], [[TRUNC_I96_4]]
+; CHECK-NEXT:    [[STORE_THIS:%.*]] = zext i32 [[OP_RDX3]] to i96
+; CHECK-NEXT:    store i96 [[STORE_THIS]], ptr null, align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+
+  %lshr.1 = lshr i96 0, 0 ; These ops
+  %lshr.2 = lshr i96 0, 0 ; return an
+  %add.0 = add i96 0, 0   ; invalid
+  %add.1 = add i96 0, 0   ; vector cost.
+
+  %trunc.i96.1 = trunc i96 %lshr.1 to i32 ; These ops
+  %trunc.i96.2 = trunc i96 %lshr.2 to i32 ; return an
+  %trunc.i96.3 = trunc i96 %add.0 to i32  ; invalid
+  %trunc.i96.4 = trunc i96 %add.1 to i32  ; vector cost.
+
+  %or.0 = or i32 %trunc.i96.1, %trunc.i96.2
+  %or.1 = or i32 %or.0, %trunc.i96.3
+  %or.2 = or i32 %or.1, %trunc.i96.4
+
+  %zext.0 = zext i1 0 to i32 ; These
+  %zext.1 = zext i1 0 to i32 ; ops
+  %zext.2 = zext i1 0 to i32 ; are
+  %zext.3 = zext i1 0 to i32 ; vectorized
+
+  %or.3 = or i32 %or.2, %zext.0 ; users
+  %or.4 = or i32 %or.3, %zext.1 ; of
+  %or.5 = or i32 %or.4, %zext.2 ; vectorized
+  %or.6 = or i32 %or.5, %zext.3 ; ops
+
+  %store.this = zext i32 %or.6 to i96
+
+  store i96 %store.this, ptr null, align 16
+  ret void
+}
+
+attributes #0 = { "target-features"="+v" }



More information about the llvm-commits mailing list