[llvm] [SLP][NFC] Add option to bypass early profitability check. (PR #88594)

via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 12 16:33:01 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Valery Dmitriev (valerydmit)

<details>
<summary>Changes</summary>

The option intended primarily for LIT tests to suppress heuristic based profitability check and proceed vectorization of a seemingly unprofitable alternate operation pattern. This allows the vectorizer to execute path that was the original intent of a test.

---
Full diff: https://github.com/llvm/llvm-project/pull/88594.diff


5 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+6-1) 
- (modified) llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll (+19) 
- (modified) llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll (+16) 
- (modified) llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll (+23) 
- (modified) llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll (+22) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7b5b0efca8d3f0..82dcd569200f45 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -118,6 +118,11 @@ static cl::opt<int>
                      cl::desc("Only vectorize if you gain more than this "
                               "number "));
 
+static cl::opt<bool> SLPSkipEarlyProfitabilityCheck(
+    "slp-skip-early-profitability-check", cl::init(false), cl::Hidden,
+    cl::desc("When true, SLP vectorizer bypasses profitability checks based on "
+             "heuristics and makes vectorization decision via cost modeling."));
+
 static cl::opt<bool>
 ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden,
                    cl::desc("Attempt to vectorize horizontal reductions"));
@@ -6215,7 +6220,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
       LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
       return TreeEntry::NeedToGather;
     }
-    if (!areAltOperandsProfitable(S, VL)) {
+    if (!SLPSkipEarlyProfitabilityCheck && !areAltOperandsProfitable(S, VL)) {
       LLVM_DEBUG(
           dbgs()
           << "SLP: ShuffleVector not vectorized, operands are buildvector and "
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll
index b76e26e0fd5717..2ff6785c0da640 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll
@@ -1,7 +1,26 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999\
+; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED
 
 define i64 @wombat() {
+; FORCED-LABEL: define i64 @wombat() {
+; FORCED-NEXT:  bb:
+; FORCED-NEXT:    br label [[BB2:%.*]]
+; FORCED:       bb1:
+; FORCED-NEXT:    br label [[BB2]]
+; FORCED:       bb2:
+; FORCED-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ 0, [[BB1:%.*]] ]
+; FORCED-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[PHI]], i32 0
+; FORCED-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT:    [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i1>
+; FORCED-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
+; FORCED-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i64
+; FORCED-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
+; FORCED-NEXT:    [[TMP6:%.*]] = zext i1 [[TMP5]] to i64
+; FORCED-NEXT:    [[OR:%.*]] = or i64 [[TMP4]], [[TMP6]]
+; FORCED-NEXT:    ret i64 [[OR]]
+;
 ; CHECK-LABEL: define i64 @wombat() {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    br label [[BB2:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
index 3c3dea3f1ea886..f2ea2df7cc982c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
@@ -1,7 +1,23 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-100 -mtriple=x86_64-w64-windows-gnu < %s | FileCheck %s
+; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-100 -mtriple=x86_64-w64-windows-gnu\
+; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED
 
 define void @test(i16 %0) {
+; FORCED-LABEL: @test(
+; FORCED-NEXT:  for.body92.preheader:
+; FORCED-NEXT:    [[TMP1:%.*]] = insertelement <2 x i16> <i16 0, i16 poison>, i16 [[TMP0:%.*]], i32 1
+; FORCED-NEXT:    [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32>
+; FORCED-NEXT:    [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32>
+; FORCED-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
+; FORCED-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
+; FORCED-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>, <4 x i32> [[TMP5]], <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+; FORCED-NEXT:    br label [[FOR_BODY92:%.*]]
+; FORCED:       for.body92:
+; FORCED-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP6]]
+; FORCED-NEXT:    store <4 x i32> [[TMP7]], ptr undef, align 8
+; FORCED-NEXT:    br label [[FOR_BODY92]]
+;
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  for.body92.preheader:
 ; CHECK-NEXT:    br label [[FOR_BODY92:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
index 19a8aa9b618156..d7144d750321fe 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
@@ -1,7 +1,30 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-9999 < %s | FileCheck %s
+; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-9999\
+; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED
 
 define i64 @foo() {
+; FORCED-LABEL: define i64 @foo() {
+; FORCED-NEXT:  bb:
+; FORCED-NEXT:    br label [[BB3:%.*]]
+; FORCED:       bb1:
+; FORCED-NEXT:    [[TMP0:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
+; FORCED-NEXT:    ret i64 0
+; FORCED:       bb3:
+; FORCED-NEXT:    [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
+; FORCED-NEXT:    [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ]
+; FORCED-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI5]], i32 0
+; FORCED-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
+; FORCED-NEXT:    [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
+; FORCED-NEXT:    [[TMP5]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; FORCED-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
+; FORCED-NEXT:    [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]]
+; FORCED-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
+; FORCED-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[TMP8]]
+; FORCED-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; FORCED-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
+; FORCED-NEXT:    br i1 false, label [[BB3]], label [[BB1:%.*]]
+;
 ; CHECK-LABEL: define i64 @foo() {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    br label [[BB3:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
index 59cd1c0ccddf8c..1163c8219dabe5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
@@ -1,7 +1,29 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
 ; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-9999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-9999 -mtriple=x86_64-unknown-linux-gnu\
+; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED
 
 define void @foo() {
+; FORCED-LABEL: define void @foo() {
+; FORCED-NEXT:  bb:
+; FORCED-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
+; FORCED-NEXT:    br label [[BB1:%.*]]
+; FORCED:       bb1:
+; FORCED-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
+; FORCED-NEXT:    [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]]
+; FORCED-NEXT:    [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[TMP0]]
+; FORCED-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
+; FORCED-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
+; FORCED-NEXT:    [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
+; FORCED-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
+; FORCED-NEXT:    [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
+; FORCED-NEXT:    br label [[BB4]]
+; FORCED:       bb4:
+; FORCED-NEXT:    br i1 false, label [[BB5:%.*]], label [[BB1]]
+; FORCED:       bb5:
+; FORCED-NEXT:    [[TMP8:%.*]] = phi <2 x i32> [ [[TMP4]], [[BB4]] ]
+; FORCED-NEXT:    ret void
+;
 ; CHECK-LABEL: define void @foo() {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    br label [[BB1:%.*]]

``````````

</details>


https://github.com/llvm/llvm-project/pull/88594


More information about the llvm-commits mailing list