[llvm] 41afef9 - [SLP]Fix PR87011: Missing sign extension of demoted type before zero extension

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 1 06:12:34 PDT 2024


Author: Alexey Bataev
Date: 2024-04-01T06:07:18-07:00
New Revision: 41afef9066eec8daf517ac357a628cdf30c95e39

URL: https://github.com/llvm/llvm-project/commit/41afef9066eec8daf517ac357a628cdf30c95e39
DIFF: https://github.com/llvm/llvm-project/commit/41afef9066eec8daf517ac357a628cdf30c95e39.diff

LOG: [SLP]Fix PR87011: Missing sign extension of demoted type before zero extension

Need to drop skipping of the first zext/sext nodes, it leads to
incorrect and less profitable code.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll
    llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/sext.ll
    llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/zext.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2bc0c5dcc6069d..1ffc39a9067431 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14263,11 +14263,9 @@ void BoUpSLP::computeMinimumValueSizes() {
   SmallVector<unsigned> RootDemotes;
   if (NodeIdx != 0 &&
       VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
-      (VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
-       VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
-       VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
+      VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
     assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
-    IsTruncRoot = VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc;
+    IsTruncRoot = true;
     RootDemotes.push_back(NodeIdx);
     IsProfitableToDemoteRoot = true;
     ++NodeIdx;

diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll
index 436fba3261d602..1166b1fca826b6 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll
@@ -7,7 +7,7 @@ define void @test() {
 ; CHECK-LABEL: define void @test(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store <2 x i64> <i64 -1, i64 0>, ptr @h, align 8
+; CHECK-NEXT:    store <2 x i64> <i64 4294967295, i64 0>, ptr @h, align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
index 5ae0ad932fdddb..b64743aaa283c7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SLM
+; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
@@ -11,20 +11,10 @@
 ;
 
 define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
-; SSE2-LABEL: @loadext_2i8_to_2i64(
-; SSE2-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
-; SSE2-NEXT:    [[I0:%.*]] = load i8, ptr [[P0]], align 1
-; SSE2-NEXT:    [[I1:%.*]] = load i8, ptr [[P1]], align 1
-; SSE2-NEXT:    [[X0:%.*]] = sext i8 [[I0]] to i64
-; SSE2-NEXT:    [[X1:%.*]] = sext i8 [[I1]] to i64
-; SSE2-NEXT:    [[V0:%.*]] = insertelement <2 x i64> poison, i64 [[X0]], i32 0
-; SSE2-NEXT:    [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
-; SSE2-NEXT:    ret <2 x i64> [[V1]]
-;
-; SLM-LABEL: @loadext_2i8_to_2i64(
-; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
-; SLM-NEXT:    [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
-; SLM-NEXT:    ret <2 x i64> [[TMP3]]
+; SSE-LABEL: @loadext_2i8_to_2i64(
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i64>
+; SSE-NEXT:    ret <2 x i64> [[TMP2]]
 ;
 ; AVX-LABEL: @loadext_2i8_to_2i64(
 ; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll
index 7d38aeb0c36357..744a50906cfc48 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SLM
+; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
@@ -11,20 +11,10 @@
 ;
 
 define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
-; SSE2-LABEL: @loadext_2i8_to_2i64(
-; SSE2-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
-; SSE2-NEXT:    [[I0:%.*]] = load i8, ptr [[P0]], align 1
-; SSE2-NEXT:    [[I1:%.*]] = load i8, ptr [[P1]], align 1
-; SSE2-NEXT:    [[X0:%.*]] = sext i8 [[I0]] to i64
-; SSE2-NEXT:    [[X1:%.*]] = sext i8 [[I1]] to i64
-; SSE2-NEXT:    [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
-; SSE2-NEXT:    [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
-; SSE2-NEXT:    ret <2 x i64> [[V1]]
-;
-; SLM-LABEL: @loadext_2i8_to_2i64(
-; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
-; SLM-NEXT:    [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
-; SLM-NEXT:    ret <2 x i64> [[TMP3]]
+; SSE-LABEL: @loadext_2i8_to_2i64(
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i64>
+; SSE-NEXT:    ret <2 x i64> [[TMP2]]
 ;
 ; AVX-LABEL: @loadext_2i8_to_2i64(
 ; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
index d1f6c41e5c30ec..27996a7064c0d8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
@@ -12,13 +12,8 @@
 
 define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
 ; SSE2-LABEL: @loadext_2i8_to_2i64(
-; SSE2-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
-; SSE2-NEXT:    [[I0:%.*]] = load i8, ptr [[P0]], align 1
-; SSE2-NEXT:    [[I1:%.*]] = load i8, ptr [[P1]], align 1
-; SSE2-NEXT:    [[X0:%.*]] = zext i8 [[I0]] to i64
-; SSE2-NEXT:    [[X1:%.*]] = zext i8 [[I1]] to i64
-; SSE2-NEXT:    [[V0:%.*]] = insertelement <2 x i64> poison, i64 [[X0]], i32 0
-; SSE2-NEXT:    [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
+; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
+; SSE2-NEXT:    [[V1:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i64>
 ; SSE2-NEXT:    ret <2 x i64> [[V1]]
 ;
 ; SLM-LABEL: @loadext_2i8_to_2i64(

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll
index 829e4bab20ffaf..94870420f2bfc6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll
@@ -12,13 +12,8 @@
 
 define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
 ; SSE2-LABEL: @loadext_2i8_to_2i64(
-; SSE2-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
-; SSE2-NEXT:    [[I0:%.*]] = load i8, ptr [[P0]], align 1
-; SSE2-NEXT:    [[I1:%.*]] = load i8, ptr [[P1]], align 1
-; SSE2-NEXT:    [[X0:%.*]] = zext i8 [[I0]] to i64
-; SSE2-NEXT:    [[X1:%.*]] = zext i8 [[I1]] to i64
-; SSE2-NEXT:    [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
-; SSE2-NEXT:    [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
+; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
+; SSE2-NEXT:    [[V1:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i64>
 ; SSE2-NEXT:    ret <2 x i64> [[V1]]
 ;
 ; SLM-LABEL: @loadext_2i8_to_2i64(


        


More information about the llvm-commits mailing list