[llvm] a36a67c - [SLP]Fix the analysis of the user buildvector nodes for minbitwidth
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 28 13:25:36 PST 2025
Author: Alexey Bataev
Date: 2025-02-28T13:17:14-08:00
New Revision: a36a67c79afaa1fdd0dbe0440ec852fd4eb3a532
URL: https://github.com/llvm/llvm-project/commit/a36a67c79afaa1fdd0dbe0440ec852fd4eb3a532
DIFF: https://github.com/llvm/llvm-project/commit/a36a67c79afaa1fdd0dbe0440ec852fd4eb3a532.diff
LOG: [SLP]Fix the analysis of the user buildvector nodes for minbitwidth
If the user node is a buildvector/gather node and it has no internal
instructions state, need to check properly for this state and check the
type of the node itself, not its operands.
Fixes #129242
Added:
llvm/test/Transforms/SLPVectorizer/X86/user-buildvector-with-minbiwidth.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4a68e0ee5989c..4f3fd6d539097 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11425,11 +11425,14 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
E->Idx != 0 &&
(E->getOpcode() != Instruction::Load || E->UserTreeIndex)) {
const EdgeInfo &EI = E->UserTreeIndex;
- if (EI.UserTE->getOpcode() != Instruction::Select ||
+ if (!EI.UserTE->hasState() ||
+ EI.UserTE->getOpcode() != Instruction::Select ||
EI.EdgeIdx != 0) {
auto UserBWIt = MinBWs.find(EI.UserTE);
Type *UserScalarTy =
- EI.UserTE->getOperand(EI.EdgeIdx).front()->getType();
+ EI.UserTE->isGather()
+ ? EI.UserTE->Scalars.front()->getType()
+ : EI.UserTE->getOperand(EI.EdgeIdx).front()->getType();
if (UserBWIt != MinBWs.end())
UserScalarTy = IntegerType::get(ScalarTy->getContext(),
UserBWIt->second.first);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/user-buildvector-with-minbiwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/user-buildvector-with-minbiwidth.ll
new file mode 100644
index 0000000000000..06068c1aa3fa8
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/user-buildvector-with-minbiwidth.ll
@@ -0,0 +1,258 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i32 @test(i8 %0, i32 %conv2, i1 %cmp.i, i64 %shl.i) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i8 [[TMP0:%.*]], i32 [[CONV2:%.*]], i1 [[CMP_I:%.*]], i64 [[SHL_I:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV21:%.*]] = sext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[CONV7:%.*]] = zext i32 [[CONV2]] to i64
+; CHECK-NEXT: [[COND_I:%.*]] = shl i64 [[CONV7]], [[SHL_I]]
+; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[COND_I]], 4294967295
+; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP1]], 1
+; CHECK-NEXT: [[CONV7_1:%.*]] = zext i32 [[CONV2]] to i64
+; CHECK-NEXT: [[COND_I_1:%.*]] = shl i64 [[CONV7_1]], [[SHL_I]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[COND_I_1]], 4294967295
+; CHECK-NEXT: [[DOTNOT_1:%.*]] = icmp eq i64 [[TMP2]], 1
+; CHECK-NEXT: [[SUB_2:%.*]] = or i32 [[CONV21]], 1
+; CHECK-NEXT: [[COND_2:%.*]] = select i1 [[CMP_I]], i32 [[SUB_2]], i32 0
+; CHECK-NEXT: [[CONV7_2:%.*]] = zext i32 [[COND_2]] to i64
+; CHECK-NEXT: [[CMP_I_2:%.*]] = icmp slt i32 [[COND_2]], 1
+; CHECK-NEXT: [[SHL_I_2:%.*]] = zext i1 [[CMP_I_2]] to i64
+; CHECK-NEXT: [[COND_I_2:%.*]] = shl i64 [[CONV7_2]], [[SHL_I_2]]
+; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[COND_I_2]], 4294967295
+; CHECK-NEXT: [[DOTNOT_2:%.*]] = icmp eq i64 [[TMP3]], 1
+; CHECK-NEXT: [[SUB_3:%.*]] = or i32 [[CONV21]], 1
+; CHECK-NEXT: [[COND_3:%.*]] = select i1 [[CMP_I]], i32 [[SUB_3]], i32 0
+; CHECK-NEXT: [[CONV7_3:%.*]] = zext i32 [[COND_3]] to i64
+; CHECK-NEXT: [[CMP_I_3:%.*]] = icmp slt i32 [[COND_3]], 1
+; CHECK-NEXT: [[SHL_I_3:%.*]] = zext i1 [[CMP_I_3]] to i64
+; CHECK-NEXT: [[COND_I_3:%.*]] = shl i64 [[CONV7_3]], [[SHL_I_3]]
+; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[COND_I_3]], 4294967295
+; CHECK-NEXT: [[DOTNOT_3:%.*]] = icmp eq i64 [[TMP4]], 1
+; CHECK-NEXT: [[SUB_4:%.*]] = or i32 [[CONV21]], 1
+; CHECK-NEXT: [[COND_4:%.*]] = select i1 [[CMP_I]], i32 [[SUB_4]], i32 0
+; CHECK-NEXT: [[CONV7_4:%.*]] = zext i32 [[COND_4]] to i64
+; CHECK-NEXT: [[CMP_I_4:%.*]] = icmp slt i32 [[COND_4]], 1
+; CHECK-NEXT: [[SHL_I_4:%.*]] = zext i1 [[CMP_I_4]] to i64
+; CHECK-NEXT: [[COND_I_4:%.*]] = shl i64 [[CONV7_4]], [[SHL_I_4]]
+; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[COND_I_4]], 4294967295
+; CHECK-NEXT: [[DOTNOT_4:%.*]] = icmp eq i64 [[TMP5]], 1
+; CHECK-NEXT: [[SUB_5:%.*]] = or i32 [[CONV21]], 1
+; CHECK-NEXT: [[COND_5:%.*]] = select i1 [[CMP_I]], i32 [[SUB_5]], i32 0
+; CHECK-NEXT: [[CONV7_5:%.*]] = zext i32 [[COND_5]] to i64
+; CHECK-NEXT: [[CMP_I_5:%.*]] = icmp slt i32 [[COND_5]], 1
+; CHECK-NEXT: [[SHL_I_5:%.*]] = zext i1 [[CMP_I_5]] to i64
+; CHECK-NEXT: [[COND_I_5:%.*]] = shl i64 [[CONV7_5]], [[SHL_I_5]]
+; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[COND_I_5]], 4294967295
+; CHECK-NEXT: [[DOTNOT_5:%.*]] = icmp eq i64 [[TMP6]], 1
+; CHECK-NEXT: [[SUB_6:%.*]] = or i32 [[CONV21]], 1
+; CHECK-NEXT: [[COND_6:%.*]] = select i1 [[CMP_I]], i32 [[SUB_6]], i32 0
+; CHECK-NEXT: [[CONV7_6:%.*]] = zext i32 [[COND_6]] to i64
+; CHECK-NEXT: [[CMP_I_6:%.*]] = icmp slt i32 [[COND_6]], 1
+; CHECK-NEXT: [[SHL_I_6:%.*]] = zext i1 [[CMP_I_6]] to i64
+; CHECK-NEXT: [[COND_I_6:%.*]] = shl i64 [[CONV7_6]], [[SHL_I_6]]
+; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[COND_I_6]], 4294967295
+; CHECK-NEXT: [[DOTNOT_6:%.*]] = icmp eq i64 [[TMP7]], 1
+; CHECK-NEXT: [[SUB_7:%.*]] = or i32 [[CONV21]], 1
+; CHECK-NEXT: [[COND_7:%.*]] = select i1 [[CMP_I]], i32 [[SUB_7]], i32 0
+; CHECK-NEXT: [[CONV7_7:%.*]] = zext i32 [[COND_7]] to i64
+; CHECK-NEXT: [[CMP_I_7:%.*]] = icmp slt i32 [[COND_7]], 1
+; CHECK-NEXT: [[SHL_I_7:%.*]] = zext i1 [[CMP_I_7]] to i64
+; CHECK-NEXT: [[COND_I_7:%.*]] = shl i64 [[CONV7_7]], [[SHL_I_7]]
+; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[COND_I_7]], 4294967295
+; CHECK-NEXT: [[DOTNOT_7:%.*]] = icmp eq i64 [[TMP8]], 1
+; CHECK-NEXT: [[SUB_8:%.*]] = or i32 [[CONV21]], 1
+; CHECK-NEXT: [[COND_8:%.*]] = select i1 [[CMP_I]], i32 [[SUB_8]], i32 0
+; CHECK-NEXT: [[CONV7_8:%.*]] = zext i32 [[COND_8]] to i64
+; CHECK-NEXT: [[CMP_I_8:%.*]] = icmp slt i32 [[COND_8]], 1
+; CHECK-NEXT: [[SHL_I_8:%.*]] = zext i1 [[CMP_I_8]] to i64
+; CHECK-NEXT: [[COND_I_8:%.*]] = shl i64 [[CONV7_8]], [[SHL_I_8]]
+; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[COND_I_8]], 4294967295
+; CHECK-NEXT: [[DOTNOT_8:%.*]] = icmp eq i64 [[TMP9]], 1
+; CHECK-NEXT: [[SUB_9:%.*]] = or i32 [[CONV21]], 1
+; CHECK-NEXT: [[COND_9:%.*]] = select i1 [[CMP_I]], i32 [[SUB_9]], i32 0
+; CHECK-NEXT: [[CONV7_9:%.*]] = zext i32 [[COND_9]] to i64
+; CHECK-NEXT: [[CMP_I_9:%.*]] = icmp slt i32 [[COND_9]], 1
+; CHECK-NEXT: [[SHL_I_9:%.*]] = zext i1 [[CMP_I_9]] to i64
+; CHECK-NEXT: [[COND_I_9:%.*]] = shl i64 [[CONV7_9]], [[SHL_I_9]]
+; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[COND_I_9]], 4294967295
+; CHECK-NEXT: [[DOTNOT_9:%.*]] = icmp eq i64 [[TMP10]], 1
+; CHECK-NEXT: br label %[[WHILE_BODY:.*]]
+; CHECK: [[WHILE_BODY]]:
+; CHECK-NEXT: br i1 [[DOTNOT]], label %[[FOR_INC:.*]], label %[[IF_THEN10:.*]]
+; CHECK: [[IF_THEN10]]:
+; CHECK-NEXT: br label %[[FOR_INC]]
+; CHECK: [[FOR_INC]]:
+; CHECK-NEXT: br i1 [[DOTNOT_1]], label %[[FOR_INC_1:.*]], label %[[IF_THEN10_1:.*]]
+; CHECK: [[IF_THEN10_1]]:
+; CHECK-NEXT: br label %[[FOR_INC_1]]
+; CHECK: [[FOR_INC_1]]:
+; CHECK-NEXT: br i1 [[DOTNOT_2]], label %[[FOR_INC_2:.*]], label %[[IF_THEN10_2:.*]]
+; CHECK: [[IF_THEN10_2]]:
+; CHECK-NEXT: br label %[[FOR_INC_2]]
+; CHECK: [[FOR_INC_2]]:
+; CHECK-NEXT: br i1 [[DOTNOT_3]], label %[[FOR_INC_3:.*]], label %[[IF_THEN10_3:.*]]
+; CHECK: [[IF_THEN10_3]]:
+; CHECK-NEXT: br label %[[FOR_INC_3]]
+; CHECK: [[FOR_INC_3]]:
+; CHECK-NEXT: br i1 [[DOTNOT_4]], label %[[FOR_INC_4:.*]], label %[[IF_THEN10_4:.*]]
+; CHECK: [[IF_THEN10_4]]:
+; CHECK-NEXT: br label %[[FOR_INC_4]]
+; CHECK: [[FOR_INC_4]]:
+; CHECK-NEXT: br i1 [[DOTNOT_5]], label %[[FOR_INC_5:.*]], label %[[IF_THEN10_5:.*]]
+; CHECK: [[IF_THEN10_5]]:
+; CHECK-NEXT: br label %[[FOR_INC_5]]
+; CHECK: [[FOR_INC_5]]:
+; CHECK-NEXT: br i1 [[DOTNOT_6]], label %[[FOR_INC_6:.*]], label %[[IF_THEN10_6:.*]]
+; CHECK: [[IF_THEN10_6]]:
+; CHECK-NEXT: br label %[[FOR_INC_6]]
+; CHECK: [[FOR_INC_6]]:
+; CHECK-NEXT: br i1 [[DOTNOT_7]], label %[[FOR_INC_7:.*]], label %[[IF_THEN10_7:.*]]
+; CHECK: [[IF_THEN10_7]]:
+; CHECK-NEXT: br label %[[FOR_INC_7]]
+; CHECK: [[FOR_INC_7]]:
+; CHECK-NEXT: br i1 [[DOTNOT_8]], label %[[FOR_INC_8:.*]], label %[[IF_THEN10_8:.*]]
+; CHECK: [[IF_THEN10_8]]:
+; CHECK-NEXT: br label %[[FOR_INC_8]]
+; CHECK: [[FOR_INC_8]]:
+; CHECK-NEXT: br i1 [[DOTNOT_9]], label %[[WHILE_BODY]], label %[[IF_THEN10_9:.*]]
+; CHECK: [[IF_THEN10_9]]:
+; CHECK-NEXT: br label %[[WHILE_BODY]]
+;
+entry:
+ %conv21 = sext i8 %0 to i32
+ %conv7 = zext i32 %conv2 to i64
+ %cond.i = shl i64 %conv7, %shl.i
+ %1 = and i64 %cond.i, 4294967295
+ %.not = icmp eq i64 %1, 1
+ %conv7.1 = zext i32 %conv2 to i64
+ %cond.i.1 = shl i64 %conv7.1, %shl.i
+ %2 = and i64 %cond.i.1, 4294967295
+ %.not.1 = icmp eq i64 %2, 1
+ %sub.2 = or i32 %conv21, 1
+ %cond.2 = select i1 %cmp.i, i32 %sub.2, i32 0
+ %conv7.2 = zext i32 %cond.2 to i64
+ %cmp.i.2 = icmp slt i32 %cond.2, 1
+ %shl.i.2 = zext i1 %cmp.i.2 to i64
+ %cond.i.2 = shl i64 %conv7.2, %shl.i.2
+ %3 = and i64 %cond.i.2, 4294967295
+ %.not.2 = icmp eq i64 %3, 1
+ %sub.3 = or i32 %conv21, 1
+ %cond.3 = select i1 %cmp.i, i32 %sub.3, i32 0
+ %conv7.3 = zext i32 %cond.3 to i64
+ %cmp.i.3 = icmp slt i32 %cond.3, 1
+ %shl.i.3 = zext i1 %cmp.i.3 to i64
+ %cond.i.3 = shl i64 %conv7.3, %shl.i.3
+ %4 = and i64 %cond.i.3, 4294967295
+ %.not.3 = icmp eq i64 %4, 1
+ %sub.4 = or i32 %conv21, 1
+ %cond.4 = select i1 %cmp.i, i32 %sub.4, i32 0
+ %conv7.4 = zext i32 %cond.4 to i64
+ %cmp.i.4 = icmp slt i32 %cond.4, 1
+ %shl.i.4 = zext i1 %cmp.i.4 to i64
+ %cond.i.4 = shl i64 %conv7.4, %shl.i.4
+ %5 = and i64 %cond.i.4, 4294967295
+ %.not.4 = icmp eq i64 %5, 1
+ %sub.5 = or i32 %conv21, 1
+ %cond.5 = select i1 %cmp.i, i32 %sub.5, i32 0
+ %conv7.5 = zext i32 %cond.5 to i64
+ %cmp.i.5 = icmp slt i32 %cond.5, 1
+ %shl.i.5 = zext i1 %cmp.i.5 to i64
+ %cond.i.5 = shl i64 %conv7.5, %shl.i.5
+ %6 = and i64 %cond.i.5, 4294967295
+ %.not.5 = icmp eq i64 %6, 1
+ %sub.6 = or i32 %conv21, 1
+ %cond.6 = select i1 %cmp.i, i32 %sub.6, i32 0
+ %conv7.6 = zext i32 %cond.6 to i64
+ %cmp.i.6 = icmp slt i32 %cond.6, 1
+ %shl.i.6 = zext i1 %cmp.i.6 to i64
+ %cond.i.6 = shl i64 %conv7.6, %shl.i.6
+ %7 = and i64 %cond.i.6, 4294967295
+ %.not.6 = icmp eq i64 %7, 1
+ %sub.7 = or i32 %conv21, 1
+ %cond.7 = select i1 %cmp.i, i32 %sub.7, i32 0
+ %conv7.7 = zext i32 %cond.7 to i64
+ %cmp.i.7 = icmp slt i32 %cond.7, 1
+ %shl.i.7 = zext i1 %cmp.i.7 to i64
+ %cond.i.7 = shl i64 %conv7.7, %shl.i.7
+ %8 = and i64 %cond.i.7, 4294967295
+ %.not.7 = icmp eq i64 %8, 1
+ %sub.8 = or i32 %conv21, 1
+ %cond.8 = select i1 %cmp.i, i32 %sub.8, i32 0
+ %conv7.8 = zext i32 %cond.8 to i64
+ %cmp.i.8 = icmp slt i32 %cond.8, 1
+ %shl.i.8 = zext i1 %cmp.i.8 to i64
+ %cond.i.8 = shl i64 %conv7.8, %shl.i.8
+ %9 = and i64 %cond.i.8, 4294967295
+ %.not.8 = icmp eq i64 %9, 1
+ %sub.9 = or i32 %conv21, 1
+ %cond.9 = select i1 %cmp.i, i32 %sub.9, i32 0
+ %conv7.9 = zext i32 %cond.9 to i64
+ %cmp.i.9 = icmp slt i32 %cond.9, 1
+ %shl.i.9 = zext i1 %cmp.i.9 to i64
+ %cond.i.9 = shl i64 %conv7.9, %shl.i.9
+ %10 = and i64 %cond.i.9, 4294967295
+ %.not.9 = icmp eq i64 %10, 1
+ br label %while.body
+
+while.body:
+ br i1 %.not, label %for.inc, label %if.then10
+
+if.then10:
+ br label %for.inc
+
+for.inc:
+ br i1 %.not.1, label %for.inc.1, label %if.then10.1
+
+if.then10.1:
+ br label %for.inc.1
+
+for.inc.1:
+ br i1 %.not.2, label %for.inc.2, label %if.then10.2
+
+if.then10.2:
+ br label %for.inc.2
+
+for.inc.2:
+ br i1 %.not.3, label %for.inc.3, label %if.then10.3
+
+if.then10.3:
+ br label %for.inc.3
+
+for.inc.3:
+ br i1 %.not.4, label %for.inc.4, label %if.then10.4
+
+if.then10.4:
+ br label %for.inc.4
+
+for.inc.4:
+ br i1 %.not.5, label %for.inc.5, label %if.then10.5
+
+if.then10.5:
+ br label %for.inc.5
+
+for.inc.5:
+ br i1 %.not.6, label %for.inc.6, label %if.then10.6
+
+if.then10.6:
+ br label %for.inc.6
+
+for.inc.6:
+ br i1 %.not.7, label %for.inc.7, label %if.then10.7
+
+if.then10.7:
+ br label %for.inc.7
+
+for.inc.7:
+ br i1 %.not.8, label %for.inc.8, label %if.then10.8
+
+if.then10.8:
+ br label %for.inc.8
+
+for.inc.8:
+ br i1 %.not.9, label %while.body, label %if.then10.9
+
+if.then10.9:
+ br label %while.body
+}
More information about the llvm-commits
mailing list