[llvm] 9abb1ff - [SLP][NFC] Add option to bypass early profitability check. (#88594)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 15 09:09:43 PDT 2024
Author: Valery Dmitriev
Date: 2024-04-15T09:09:39-07:00
New Revision: 9abb1ffc5cb75465340cb604988d1e386415bd72
URL: https://github.com/llvm/llvm-project/commit/9abb1ffc5cb75465340cb604988d1e386415bd72
DIFF: https://github.com/llvm/llvm-project/commit/9abb1ffc5cb75465340cb604988d1e386415bd72.diff
LOG: [SLP][NFC] Add option to bypass early profitability check. (#88594)
The option intended primarily for LIT tests to suppress heuristic based
profitability check and proceed vectorization of a seemingly
unprofitable alternate operation pattern. This allows the vectorizer to
execute path that was the original intent of a test.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll
llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b031b40a978f5d..9a0eaca2f59806 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -118,6 +118,11 @@ static cl::opt<int>
cl::desc("Only vectorize if you gain more than this "
"number "));
+static cl::opt<bool> SLPSkipEarlyProfitabilityCheck(
+ "slp-skip-early-profitability-check", cl::init(false), cl::Hidden,
+ cl::desc("When true, SLP vectorizer bypasses profitability checks based on "
+ "heuristics and makes vectorization decision via cost modeling."));
+
static cl::opt<bool>
ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden,
cl::desc("Attempt to vectorize horizontal reductions"));
@@ -6250,7 +6255,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return TreeEntry::NeedToGather;
}
- if (!areAltOperandsProfitable(S, VL)) {
+ if (!SLPSkipEarlyProfitabilityCheck && !areAltOperandsProfitable(S, VL)) {
LLVM_DEBUG(
dbgs()
<< "SLP: ShuffleVector not vectorized, operands are buildvector and "
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll
index b76e26e0fd5717..2ff6785c0da640 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll
@@ -1,7 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999\
+; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED
define i64 @wombat() {
+; FORCED-LABEL: define i64 @wombat() {
+; FORCED-NEXT: bb:
+; FORCED-NEXT: br label [[BB2:%.*]]
+; FORCED: bb1:
+; FORCED-NEXT: br label [[BB2]]
+; FORCED: bb2:
+; FORCED-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ 0, [[BB1:%.*]] ]
+; FORCED-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[PHI]], i32 0
+; FORCED-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i1>
+; FORCED-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
+; FORCED-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i64
+; FORCED-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
+; FORCED-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i64
+; FORCED-NEXT: [[OR:%.*]] = or i64 [[TMP4]], [[TMP6]]
+; FORCED-NEXT: ret i64 [[OR]]
+;
; CHECK-LABEL: define i64 @wombat() {
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[BB2:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
index 3c3dea3f1ea886..f2ea2df7cc982c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
@@ -1,7 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-100 -mtriple=x86_64-w64-windows-gnu < %s | FileCheck %s
+; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-100 -mtriple=x86_64-w64-windows-gnu\
+; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED
define void @test(i16 %0) {
+; FORCED-LABEL: @test(
+; FORCED-NEXT: for.body92.preheader:
+; FORCED-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> <i16 0, i16 poison>, i16 [[TMP0:%.*]], i32 1
+; FORCED-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32>
+; FORCED-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32>
+; FORCED-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
+; FORCED-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
+; FORCED-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>, <4 x i32> [[TMP5]], <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+; FORCED-NEXT: br label [[FOR_BODY92:%.*]]
+; FORCED: for.body92:
+; FORCED-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP6]]
+; FORCED-NEXT: store <4 x i32> [[TMP7]], ptr undef, align 8
+; FORCED-NEXT: br label [[FOR_BODY92]]
+;
; CHECK-LABEL: @test(
; CHECK-NEXT: for.body92.preheader:
; CHECK-NEXT: br label [[FOR_BODY92:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
index 19a8aa9b618156..d7144d750321fe 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
@@ -1,7 +1,30 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-9999 < %s | FileCheck %s
+; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-9999\
+; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED
define i64 @foo() {
+; FORCED-LABEL: define i64 @foo() {
+; FORCED-NEXT: bb:
+; FORCED-NEXT: br label [[BB3:%.*]]
+; FORCED: bb1:
+; FORCED-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
+; FORCED-NEXT: ret i64 0
+; FORCED: bb3:
+; FORCED-NEXT: [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
+; FORCED-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ]
+; FORCED-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI5]], i32 0
+; FORCED-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
+; FORCED-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
+; FORCED-NEXT: [[TMP5]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; FORCED-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
+; FORCED-NEXT: [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]]
+; FORCED-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
+; FORCED-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[TMP8]]
+; FORCED-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; FORCED-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
+; FORCED-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]]
+;
; CHECK-LABEL: define i64 @foo() {
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[BB3:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
index 59cd1c0ccddf8c..1163c8219dabe5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
@@ -1,7 +1,29 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-9999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-9999 -mtriple=x86_64-unknown-linux-gnu\
+; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED
define void @foo() {
+; FORCED-LABEL: define void @foo() {
+; FORCED-NEXT: bb:
+; FORCED-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
+; FORCED-NEXT: br label [[BB1:%.*]]
+; FORCED: bb1:
+; FORCED-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
+; FORCED-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]]
+; FORCED-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[TMP0]]
+; FORCED-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
+; FORCED-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
+; FORCED-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
+; FORCED-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
+; FORCED-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
+; FORCED-NEXT: br label [[BB4]]
+; FORCED: bb4:
+; FORCED-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]]
+; FORCED: bb5:
+; FORCED-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP4]], [[BB4]] ]
+; FORCED-NEXT: ret void
+;
; CHECK-LABEL: define void @foo() {
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[BB1:%.*]]
More information about the llvm-commits
mailing list