[llvm] c28b7eb - [SLP]Fix handling of -slp-vectorize-hor-store for values with many uses.

Wed Nov 1 08:42:07 PDT 2023

Author: Alexey Bataev
Date: 2023-11-01T08:41:54-07:00
New Revision: c28b7eb4966741895d95e021652ab017ddc2ac73

URL: https://github.com/llvm/llvm-project/commit/c28b7eb4966741895d95e021652ab017ddc2ac73
DIFF: https://github.com/llvm/llvm-project/commit/c28b7eb4966741895d95e021652ab017ddc2ac73.diff

LOG: [SLP]Fix handling of -slp-vectorize-hor-store for values with many uses.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/horizontal-store-many-uses.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 88c72dde041e07c..5ea3b3e24524978 100644

--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15822,8 +15822,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
         // to investigate if we can safely turn on slp-vectorize-hor-store
         // instead to allow lookup for reduction chains in all non-vectorized
         // stores (need to check side effects and compile time).
-        TryToVectorizeRoot = (I == Stores.end() || I->second.size() == 1) &&
-                             SI->getValueOperand()->hasOneUse();
+        TryToVectorizeRoot |= (I == Stores.end() || I->second.size() == 1) &&
+                              SI->getValueOperand()->hasOneUse();
       }
       if (TryToVectorizeRoot) {
         for (auto *V : it->operand_values()) {

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-store-many-uses.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-store-many-uses.ll
index d93f1edfc5971e6..b94b4d99f58febe 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-store-many-uses.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-store-many-uses.ll
@@ -6,15 +6,10 @@ define void @test(ptr noalias %pl, ptr noalias %res, ptr noalias %p2) {
 ; CHECK-LABEL: define void @test(
 ; CHECK-SAME: ptr noalias [[PL:%.*]], ptr noalias [[RES:%.*]], ptr noalias [[P2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @arr_i32, align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr_i32, i64 0, i64 1), align 4
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr_i32, i64 0, i64 2), align 8
-; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr_i32, i64 0, i64 3), align 4
-; CHECK-NEXT:    [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]]
-; CHECK-NEXT:    store i32 [[ADD_2]], ptr [[P2]], align 16
-; CHECK-NEXT:    store i32 [[ADD_2]], ptr [[RES]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @arr_i32, align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]])
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[P2]], align 16
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[RES]], align 16
 ; CHECK-NEXT:    ret void
 ;
 entry: