[llvm] b6f5178 - [SLP]Fix signedness analysis for scalars in graph.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 15 07:14:00 PST 2023
Author: Alexey Bataev
Date: 2023-11-15T07:10:59-08:00
New Revision: b6f51787f6c8e77143f0aef6b58ddc7c55741d5c
URL: https://github.com/llvm/llvm-project/commit/b6f51787f6c8e77143f0aef6b58ddc7c55741d5c
DIFF: https://github.com/llvm/llvm-project/commit/b6f51787f6c8e77143f0aef6b58ddc7c55741d5c.diff
LOG: [SLP]Fix signedness analysis for scalars in graph.
Cannot use the sign info for the roots for all scalars in the graph,
need to perform the analysis for each particular scalar (tree node).
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b4e302b7f9baf11..79b437642a1db0f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13182,8 +13182,23 @@ void BoUpSLP::computeMinimumValueSizes() {
collectValuesToDemote(Roots.pop_back_val(), Expr, ToDemote, Roots);
// Finally, map the values we can demote to the maximum bit with we computed.
- for (auto *Scalar : ToDemote)
- MinBWs.try_emplace(Scalar, MaxBitWidth, !IsKnownPositive);
+ DenseMap<const TreeEntry *, bool> Signendness;
+ for (auto *Scalar : ToDemote) {
+ bool IsSigned = true;
+ if (auto *TE = getTreeEntry(Scalar)) {
+ auto It = Signendness.find(TE);
+ if (It != Signendness.end()) {
+ IsSigned = It->second;
+ } else {
+ IsSigned = any_of(TE->Scalars, [&](Value *R) {
+ KnownBits Known = computeKnownBits(R, *DL);
+ return !Known.isNonNegative();
+ });
+ Signendness.try_emplace(TE, IsSigned);
+ }
+ }
+ MinBWs.try_emplace(Scalar, MaxBitWidth, IsSigned);
+ }
}
PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) {
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
index b8be5bccc4c9037..d13597b43f9f72f 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
@@ -295,9 +295,9 @@ define i32 @gather_reduce_8x16_i64(ptr nocapture readonly %a, ptr nocapture read
; GENERIC-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
; GENERIC-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
-; GENERIC-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[TMP0]] to <8 x i32>
+; GENERIC-NEXT: [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
; GENERIC-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
-; GENERIC-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
+; GENERIC-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
; GENERIC-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
; GENERIC-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
; GENERIC-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
@@ -368,9 +368,9 @@ define i32 @gather_reduce_8x16_i64(ptr nocapture readonly %a, ptr nocapture read
; KRYO-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
; KRYO-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
; KRYO-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
-; KRYO-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[TMP0]] to <8 x i32>
+; KRYO-NEXT: [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
; KRYO-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
-; KRYO-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
+; KRYO-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
; KRYO-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
; KRYO-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
; KRYO-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
index 3fff428ef79b6ee..63934a2cc96461e 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
@@ -235,9 +235,9 @@ define void @test_i16_extend(ptr %p.1, ptr %p.2, i32 %idx.i32) {
; CHECK-NEXT: [[T53:%.*]] = getelementptr inbounds i16, ptr [[P_1:%.*]], i64 [[IDX_0]]
; CHECK-NEXT: [[T56:%.*]] = getelementptr inbounds i16, ptr [[P_2:%.*]], i64 [[IDX_0]]
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[T53]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i16> [[TMP1]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr [[T56]], align 2
-; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[TMP3]] to <8 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[TMP3]] to <8 x i32>
; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <8 x i32> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP5]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
More information about the llvm-commits
mailing list