[llvm] f564a48 - [SLP]Fix PR108700: correctly identify id of the operand node
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 16 09:44:56 PDT 2024
Author: Alexey Bataev
Date: 2024-09-16T09:44:47-07:00
New Revision: f564a48f0ea4d2100c0cadfa6e6f20f97244025e
URL: https://github.com/llvm/llvm-project/commit/f564a48f0ea4d2100c0cadfa6e6f20f97244025e
DIFF: https://github.com/llvm/llvm-project/commit/f564a48f0ea4d2100c0cadfa6e6f20f97244025e.diff
LOG: [SLP]Fix PR108700: correctly identify id of the operand node
If the operand node for truncs is not created during construction, but
one of the previous ones is reused instead, need to correctly identify
its index, to correctly emit the code.
Fixes https://github.com/llvm/llvm-project/issues/108700
Added:
llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-user-not-min.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5f2bf082fb87f0..282bb8eac7e2e4 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7481,7 +7481,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
PrevMaxBW),
std::min<unsigned>(DL->getTypeSizeInBits(VL0->getType()),
PrevMinBW));
- ExtraBitWidthNodes.insert(VectorizableTree.size() + 1);
+ }
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndices);
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
+
+ TE->setOperandsInOrder();
+ for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
+ buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
+ if (ShuffleOrOp == Instruction::Trunc) {
+ ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
} else if (ShuffleOrOp == Instruction::SIToFP ||
ShuffleOrOp == Instruction::UIToFP) {
unsigned NumSignBits =
@@ -7492,15 +7501,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
if (NumSignBits * 2 >=
DL->getTypeSizeInBits(VL0->getOperand(0)->getType()))
- ExtraBitWidthNodes.insert(VectorizableTree.size() + 1);
+ ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
}
- TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndices);
- LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
-
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
- buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
}
case Instruction::ICmp:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-user-not-min.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-user-not-min.ll
index 50b19d01ad58f1..6922df8991b831 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-user-not-min.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-user-not-min.ll
@@ -6,10 +6,10 @@ define void @test(ptr %block, ptr noalias %pixels, i1 %b) {
; CHECK-SAME: ptr [[BLOCK:%.*]], ptr noalias [[PIXELS:%.*]], i1 [[B:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i1> <i1 true, i1 poison, i1 false, i1 false>, i1 [[B]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = sext <4 x i1> [[TMP0]] to <4 x i8>
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[BLOCK]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i16> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i16> [[TMP2]] to <4 x i8>
-; CHECK-NEXT: [[TMP1:%.*]] = sext <4 x i1> [[TMP0]] to <4 x i8>
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP3]], <4 x i8> [[TMP4]], <4 x i8> [[TMP1]]
; CHECK-NEXT: store <4 x i8> [[TMP5]], ptr [[PIXELS]], align 1
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll b/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll
new file mode 100644
index 00000000000000..4b62ef688ca44f
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i16 @test() {
+; CHECK-LABEL: define i16 @test() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> <i1 false, i1 false, i1 poison, i1 poison>, <2 x i1> zeroinitializer, i64 2)
+; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> zeroinitializer, [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i64>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i64> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i1> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> zeroinitializer, <4 x i1> [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i1> [[TMP7]] to <4 x i16>
+; CHECK-NEXT: [[TMP9:%.*]] = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> [[TMP8]])
+; CHECK-NEXT: ret i16 [[TMP9]]
+;
+entry:
+ %conv73 = xor i64 0, 0
+ %and.i = and i64 0, 0
+ %xor2.i = or i64 %and.i, 0
+ %sub.i = or i64 %xor2.i, 0
+ %xor3.i = xor i64 %sub.i, %conv73
+ %and4.i = and i64 %xor3.i, 0
+ %cmp.i = icmp slt i64 %and4.i, 0
+ %0 = trunc i64 %conv73 to i16
+ %1 = or i16 0, %0
+ %conv73i = xor i64 0, 0
+ %andi.i = and i64 0, 0
+ %xor2i.i = or i64 %andi.i, 0
+ %subi.i = or i64 %xor2i.i, 0
+ %xor3i.i = xor i64 %subi.i, %conv73i
+ %and4i.i = and i64 %xor3i.i, 0
+ %cmpi.i = icmp slt i64 %and4i.i, 0
+ %2 = trunc i64 %conv73i to i16
+ %3 = or i16 0, %2
+ %4 = select i1 %cmpi.i, i16 0, i16 %3
+ %5 = select i1 %cmp.i, i16 0, i16 %1
+ %6 = zext i32 0 to i64
+ %add.ip = or i64 %6, 0
+ %orp = or i64 %add.ip, 0
+ %conv72p = shl i64 %orp, 0
+ %sextp = ashr i64 %conv72p, 0
+ %conv73p = xor i64 %sextp, 0
+ %and.ip = and i64 0, 0
+ %xor2.ip = or i64 %and.ip, 0
+ %sub.ip = or i64 %xor2.ip, 0
+ %xor3.ip = xor i64 %sub.ip, %conv73p
+ %and4.ip = and i64 %xor3.ip, 0
+ %cmp.ip = icmp slt i64 %and4.ip, 0
+ %7 = trunc i64 %conv73p to i16
+ %8 = or i16 0, %7
+ %9 = select i1 %cmp.ip, i16 0, i16 %8
+ %conv76i = and i16 %4, %5
+ %conv76p = and i16 %conv76i, %9
+ %10 = zext i32 0 to i64
+ %add.ip1 = or i64 %10, 0
+ %orp1 = or i64 %add.ip1, 0
+ %conv72p1 = shl i64 %orp1, 0
+ %sextp1 = ashr i64 %conv72p1, 0
+ %conv73p1 = xor i64 %sextp1, 0
+ %and.ip1 = and i64 0, 0
+ %xor2.ip1 = or i64 %and.ip1, 0
+ %sub.ip1 = or i64 %xor2.ip1, 0
+ %xor3.ip1 = xor i64 %sub.ip1, %conv73p1
+ %and4.ip1 = and i64 %xor3.ip1, 0
+ %cmp.ip1 = icmp slt i64 %and4.ip1, 0
+ %11 = trunc i64 %conv73p1 to i16
+ %12 = or i16 0, %11
+ %13 = select i1 %cmp.ip1, i16 0, i16 %12
+ %conv76p2 = and i16 %conv76p, %13
+ ret i16 %conv76p2
+}
More information about the llvm-commits
mailing list