[llvm] 74e07ab - [SLP]Fix final analysis for unsigned nodes.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 18 10:09:08 PDT 2024
Author: Alexey Bataev
Date: 2024-04-18T10:05:54-07:00
New Revision: 74e07ab523122d6a8347b25770062ab331b6bb84
URL: https://github.com/llvm/llvm-project/commit/74e07ab523122d6a8347b25770062ab331b6bb84
DIFF: https://github.com/llvm/llvm-project/commit/74e07ab523122d6a8347b25770062ab331b6bb84.diff
LOG: [SLP]Fix final analysis for unsigned nodes.
Need to check that at least single bit is cleared for unsigned nodes
before reducing their size. Otherwise they might be treated as signed in
signed nodes.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 075fbd3bf868f1..25cb6bb599831a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14442,11 +14442,18 @@ bool BoUpSLP::collectValuesToDemote(
}
auto NumSignBits = ComputeNumSignBits(V, *DL, 0, AC, nullptr, DT);
unsigned BitWidth1 = OrigBitWidth - NumSignBits;
- if (!isKnownNonNegative(V, SimplifyQuery(*DL)))
+ bool IsSigned = !isKnownNonNegative(V, SimplifyQuery(*DL));
+ if (IsSigned)
++BitWidth1;
if (auto *I = dyn_cast<Instruction>(V)) {
APInt Mask = DB->getDemandedBits(I);
unsigned BitWidth2 = Mask.getBitWidth() - Mask.countl_zero();
+ while (!IsSigned && BitWidth2 < OrigBitWidth) {
+ APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth2 - 1);
+ if (MaskedValueIsZero(V, Mask, SimplifyQuery(*DL)))
+ break;
+ BitWidth2 *= 2;
+ }
BitWidth1 = std::min(BitWidth1, BitWidth2);
}
BitWidth = std::max(BitWidth, BitWidth1);
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll
index 3c02817f1ac190..4f855e20fac4db 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll
@@ -8,18 +8,20 @@ define void @test(ptr %p, i16 %load794) {
; CHECK-NEXT: [[GEP799:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[P]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr [[GEP799]], align 2
-; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i16> [[TMP2]], [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i16> [[TMP3]], <i16 3329, i16 3329>
+; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32>
+; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i32> [[TMP7]], <i32 3329, i32 3329>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[ZEXT795]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = trunc <2 x i32> [[TMP6]] to <2 x i16>
-; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i16> [[TMP4]], [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i16> [[TMP8]] to <2 x i64>
+; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i32> [[TMP8]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i32> [[TMP12]] to <2 x i64>
; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw <2 x i64> [[TMP9]], <i64 5039, i64 5039>
; CHECK-NEXT: [[TMP11:%.*]] = lshr <2 x i64> [[TMP10]], <i64 24, i64 24>
-; CHECK-NEXT: [[TMP12:%.*]] = trunc <2 x i64> [[TMP11]] to <2 x i16>
-; CHECK-NEXT: [[TMP13:%.*]] = mul <2 x i16> [[TMP12]], <i16 -3329, i16 -3329>
-; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i16> [[TMP13]], [[TMP8]]
+; CHECK-NEXT: [[TMP13:%.*]] = trunc <2 x i64> [[TMP11]] to <2 x i32>
+; CHECK-NEXT: [[TMP20:%.*]] = mul <2 x i32> [[TMP13]], <i32 62207, i32 62207>
+; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP20]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = trunc <2 x i32> [[TMP21]] to <2 x i16>
; CHECK-NEXT: [[TMP15:%.*]] = add <2 x i16> [[TMP14]], <i16 -3329, i16 -3329>
; CHECK-NEXT: [[TMP16:%.*]] = icmp slt <2 x i16> [[TMP15]], zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> [[TMP14]], <2 x i16> zeroinitializer
More information about the llvm-commits
mailing list