[llvm] a476032 - [SLP]Fix PR91025: correctly handle smin/smax of signed operands.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Mon May 6 08:14:37 PDT 2024
Author: Alexey Bataev
Date: 2024-05-06T08:10:20-07:00
New Revision: a4760321017bd55f51ea0961231ca48ce9e14624
URL: https://github.com/llvm/llvm-project/commit/a4760321017bd55f51ea0961231ca48ce9e14624
DIFF: https://github.com/llvm/llvm-project/commit/a4760321017bd55f51ea0961231ca48ce9e14624.diff
LOG: [SLP]Fix PR91025: correctly handle smin/smax of signed operands.
Need to check that the signed operand has an extra sign bit to be sure
that we do not skip signedness, when trying to minimize bitwidth for
smin/smax intrinsics.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ea132a89d2002b..bbfd088a799417 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15163,14 +15163,18 @@ bool BoUpSLP::collectValuesToDemote(
"Expected min/max intrinsics only.");
unsigned SignBits = OrigBitWidth - BitWidth;
APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth - 1);
- return SignBits <= ComputeNumSignBits(I->getOperand(0), *DL, 0, AC,
- nullptr, DT) &&
- (!isKnownNonNegative(I->getOperand(0), SimplifyQuery(*DL)) ||
+ unsigned Op0SignBits = ComputeNumSignBits(I->getOperand(0), *DL, 0, AC,
+ nullptr, DT);
+ unsigned Op1SignBits = ComputeNumSignBits(I->getOperand(1), *DL, 0, AC,
+ nullptr, DT);
+ return SignBits <= Op0SignBits &&
+ ((SignBits != Op0SignBits &&
+ !isKnownNonNegative(I->getOperand(0), SimplifyQuery(*DL))) ||
MaskedValueIsZero(I->getOperand(0), Mask,
SimplifyQuery(*DL))) &&
- SignBits <= ComputeNumSignBits(I->getOperand(1), *DL, 0, AC,
- nullptr, DT) &&
- (!isKnownNonNegative(I->getOperand(1), SimplifyQuery(*DL)) ||
+ SignBits <= Op1SignBits &&
+ ((SignBits != Op1SignBits &&
+ !isKnownNonNegative(I->getOperand(1), SimplifyQuery(*DL))) ||
MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL)));
});
};
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll
index 41517b94accf1a..54b7b1192ec973 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll
@@ -6,14 +6,14 @@ define <4 x i32> @test(i16 %0, i16 %1) {
; CHECK-SAME: i16 [[TMP0:%.*]], i16 [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> <i16 poison, i16 0>, i16 [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[CONV15_I:%.*]] = sext i16 [[TMP0]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i16> [[TMP3]], <i16 -1, i16 -1, i16 -1, i16 -1>
-; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[CONV15_I]] to i16
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i16> <i16 0, i16 poison, i16 poison, i16 poison>, i16 [[TMP5]], i32 1
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
-; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[TMP4]], <4 x i16> [[TMP7]])
-; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i16> [[TMP8]] to <4 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, i32 [[CONV15_I]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP5]], <4 x i32> [[TMP7]])
+; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i32> [[TMP8]], <i32 65535, i32 65535, i32 65535, i32 65535>
; CHECK-NEXT: ret <4 x i32> [[TMP9]]
;
entry:
More information about the llvm-commits
mailing list