[llvm] 342f7d0 - [SLP]Fix PR86620: check final minbitwidth for truncs/exts before

Tue Mar 26 11:31:08 PDT 2024

Author: Alexey Bataev
Date: 2024-03-26T11:27:17-07:00
New Revision: 342f7d0d35774ffdcb56e8b92252763a59bd2c29

URL: https://github.com/llvm/llvm-project/commit/342f7d0d35774ffdcb56e8b92252763a59bd2c29
DIFF: https://github.com/llvm/llvm-project/commit/342f7d0d35774ffdcb56e8b92252763a59bd2c29.diff

LOG: [SLP]Fix PR86620: check final minbitwidth for truncs/exts before
accepting it.

If the minbitwidth is deduced from the demanded elements, need to check
the final bitwidthfor trunc/ext instruction, bot blindly accepting
the used one.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll
    llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 551d9757075639..fbf1cb6a976ff9 100644

--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14092,12 +14092,14 @@ bool BoUpSLP::collectValuesToDemote(
       MaxDepthLevel = 1;
     if (IsProfitableToDemoteRoot)
       IsProfitableToDemote = true;
+    (void)IsPotentiallyTruncated(V, BitWidth);
     break;
   case Instruction::ZExt:
   case Instruction::SExt:
     if (!IsTruncRoot)
       MaxDepthLevel = 1;
     IsProfitableToDemote = true;
+    (void)IsPotentiallyTruncated(V, BitWidth);
     break;
 
   // We can demote certain binary operations if we can demote both of their

diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll
index 04d275742832ef..2d69c7c984dcd2 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll
@@ -8,11 +8,10 @@ define i32 @test() {
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 4)
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i16>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i16> [[TMP1]], <i16 -1, i16 -1, i16 -1, i16 -1>
-; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i16> [[TMP2]], <i16 -1, i16 -1, i16 -1, i16 -1>
-; CHECK-NEXT:    [[TMP4:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[TMP3]])
-; CHECK-NEXT:    [[TMP5:%.*]] = zext i16 [[TMP4]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[TMP1]], <i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], <i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP3]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP5]], i32 1)
 ; CHECK-NEXT:    ret i32 [[TMP6]]
 ;

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll
index 5ee80160765387..f4a471493f1b3f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-6 < %s | FileCheck %s
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-9 < %s | FileCheck %s
 
 define void @t(i64 %v) {
 ; CHECK-LABEL: define void @t(
@@ -7,10 +7,9 @@ define void @t(i64 %v) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i16>
-; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i16> [[TMP2]], <i16 5, i16 6, i16 3, i16 2>
-; CHECK-NEXT:    [[TMP4:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP3]])
-; CHECK-NEXT:    [[TMP5:%.*]] = sext i16 [[TMP4]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], <i32 5, i32 6, i32 3, i32 2>
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP3]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[TMP5]], 65535
 ; CHECK-NEXT:    store i32 [[TMP6]], ptr null, align 4
 ; CHECK-NEXT:    ret void