[llvm] b43ec8e - [SLP]Fix PR86798: handle phi nodes being trunced, but not its operands.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 27 07:25:19 PDT 2024
Author: Alexey Bataev
Date: 2024-03-27T07:21:45-07:00
New Revision: b43ec8e62b5f5a39be378c460339217511261400
URL: https://github.com/llvm/llvm-project/commit/b43ec8e62b5f5a39be378c460339217511261400
DIFF: https://github.com/llvm/llvm-project/commit/b43ec8e62b5f5a39be378c460339217511261400.diff
LOG: [SLP]Fix PR86798: handle phi nodes being trunced, but not its operands.
If the phi node is trunced, but not its operand(s), need to handle this
situation in the assertion, code already does the right transformation.
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index fbf1cb6a976ff9..e1f26b922dbe4d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11926,7 +11926,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
Value *Vec = vectorizeOperand(E, I, /*PostponedPHIs=*/true);
if (VecTy != Vec->getType()) {
- assert((getOperandEntry(E, I)->State == TreeEntry::NeedToGather ||
+ assert((It != MinBWs.end() ||
+ getOperandEntry(E, I)->State == TreeEntry::NeedToGather ||
MinBWs.contains(getOperandEntry(E, I))) &&
"Expected item in MinBWs.");
Vec = Builder.CreateIntCast(Vec, VecTy, GetOperandSignedness(I));
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll
new file mode 100644
index 00000000000000..f376ca71c77693
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+define i32 @test(ptr %b, ptr %c, i32 %0, ptr %a, i1 %tobool3.not) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ptr [[B:%.*]], ptr [[C:%.*]], i32 [[TMP0:%.*]], ptr [[A:%.*]], i1 [[TOBOOL3_NOT:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[TOBOOL3_NOT]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i1> [[TMP4]] to <4 x i16>
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i1> [[TMP8]] to <4 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL3_NOT]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i1> [[TMP10]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP7]], <4 x i32> [[TMP9]]
+; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i32> [[TMP12]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT: [[TMP14:%.*]] = ashr <4 x i32> [[TMP13]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i16>
+; CHECK-NEXT: br i1 true, label [[BB3]], label [[BB2]]
+; CHECK: bb3:
+; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i16> [ [[TMP5]], [[BB1]] ], [ [[TMP15]], [[BB2]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i16> [[TMP16]], i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = sext i16 [[TMP17]] to i32
+; CHECK-NEXT: store i32 [[TMP18]], ptr [[B]], align 16
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i16> [[TMP16]], i32 1
+; CHECK-NEXT: [[TMP20:%.*]] = sext i16 [[TMP19]] to i32
+; CHECK-NEXT: store i32 [[TMP20]], ptr [[A]], align 8
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i16> [[TMP16]], i32 2
+; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32
+; CHECK-NEXT: store i32 [[TMP22]], ptr [[C]], align 16
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i16> [[TMP16]], i32 3
+; CHECK-NEXT: [[TMP24:%.*]] = sext i16 [[TMP23]] to i32
+; CHECK-NEXT: store i32 [[TMP24]], ptr [[B]], align 8
+; CHECK-NEXT: ret i32 0
+ br i1 %tobool3.not, label %bb1, label %bb2
+ %conv1.i.us = ashr i32 %0, 16
+ %cmp2.i.us = icmp slt i32 %conv1.i.us, %0
+ %sext26.us = zext i1 %cmp2.i.us to i32
+ %conv1.i.us.5 = ashr i32 %0, 16
+ %cmp2.i.us.5 = icmp slt i32 %conv1.i.us.5, %0
+ %sext26.us.5 = zext i1 %cmp2.i.us.5 to i32
+ %conv1.i.us.6 = ashr i32 %0, 16
+ %cmp2.i.us.6 = icmp slt i32 %conv1.i.us.6, %0
+ %sext26.us.6 = zext i1 %cmp2.i.us.6 to i32
+ %conv1.i.us.7 = ashr i32 %0, 16
+ %cmp2.i.us.7 = icmp slt i32 %conv1.i.us.7, %0
+ %sext26.us.7 = zext i1 %cmp2.i.us.7 to i32
+ br label %bb3
+ %cmp2.i = icmp sgt i32 %0, 0
+ %1 = zext i1 %cmp2.i to i32
+ %cond.i = select i1 %tobool3.not, i32 %0, i32 %1
+ %sext26 = shl i32 %cond.i, 16
+ %conv13 = ashr i32 %sext26, 16
+ %cmp2.i.5 = icmp sgt i32 %0, 0
+ %2 = zext i1 %cmp2.i.5 to i32
+ %cond.i.5 = select i1 %tobool3.not, i32 %0, i32 %2
+ %sext26.5 = shl i32 %cond.i.5, 16
+ %conv13.5 = ashr i32 %sext26.5, 16
+ %cmp2.i.6 = icmp sgt i32 %0, 0
+ %3 = zext i1 %cmp2.i.6 to i32
+ %cond.i.6 = select i1 %tobool3.not, i32 %0, i32 %3
+ %sext26.6 = shl i32 %cond.i.6, 16
+ %conv13.6 = ashr i32 %sext26.6, 16
+ %cmp2.i.7 = icmp sgt i32 %0, 0
+ %4 = zext i1 %cmp2.i.7 to i32
+ %cond.i.7 = select i1 %tobool3.not, i32 %0, i32 %4
+ %sext26.7 = shl i32 %cond.i.7, 16
+ %conv13.7 = ashr i32 %sext26.7, 16
+ br i1 true, label %bb3, label %bb2
+ %conv13p = phi i32 [ %sext26.us, %bb1 ], [ %conv13, %bb2 ]
+ %conv13.5p = phi i32 [ %sext26.us.5, %bb1 ], [ %conv13.5, %bb2 ]
+ %conv13.6p = phi i32 [ %sext26.us.6, %bb1 ], [ %conv13.6, %bb2 ]
+ %conv13.7p = phi i32 [ %sext26.us.7, %bb1 ], [ %conv13.7, %bb2 ]
+ store i32 %conv13p, ptr %b, align 16
+ store i32 %conv13.5p, ptr %a, align 8
+ store i32 %conv13.6p, ptr %c, align 16
+ store i32 %conv13.7p, ptr %b, align 8
+ ret i32 0
More information about the llvm-commits
mailing list