[llvm] 41afef9 - [SLP]Fix PR87011: Missing sign extension of demoted type before zero extension
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 1 06:12:34 PDT 2024
Author: Alexey Bataev
Date: 2024-04-01T06:07:18-07:00
New Revision: 41afef9066eec8daf517ac357a628cdf30c95e39
URL: https://github.com/llvm/llvm-project/commit/41afef9066eec8daf517ac357a628cdf30c95e39
DIFF: https://github.com/llvm/llvm-project/commit/41afef9066eec8daf517ac357a628cdf30c95e39.diff
LOG: [SLP]Fix PR87011: Missing sign extension of demoted type before zero extension
Need to drop skipping of the first zext/sext nodes, it leads to
incorrect and less profitable code.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll
llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/sext.ll
llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/zext.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2bc0c5dcc6069d..1ffc39a9067431 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14263,11 +14263,9 @@ void BoUpSLP::computeMinimumValueSizes() {
SmallVector<unsigned> RootDemotes;
if (NodeIdx != 0 &&
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
- (VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
- VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
- VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
+ VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
- IsTruncRoot = VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc;
+ IsTruncRoot = true;
RootDemotes.push_back(NodeIdx);
IsProfitableToDemoteRoot = true;
++NodeIdx;
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll
index 436fba3261d602..1166b1fca826b6 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll
@@ -7,7 +7,7 @@ define void @test() {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: store <2 x i64> <i64 -1, i64 0>, ptr @h, align 8
+; CHECK-NEXT: store <2 x i64> <i64 4294967295, i64 0>, ptr @h, align 8
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
index 5ae0ad932fdddb..b64743aaa283c7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SLM
+; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
@@ -11,20 +11,10 @@
;
define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
-; SSE2-LABEL: @loadext_2i8_to_2i64(
-; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
-; SSE2-NEXT: [[I0:%.*]] = load i8, ptr [[P0]], align 1
-; SSE2-NEXT: [[I1:%.*]] = load i8, ptr [[P1]], align 1
-; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64
-; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64
-; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> poison, i64 [[X0]], i32 0
-; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
-; SSE2-NEXT: ret <2 x i64> [[V1]]
-;
-; SLM-LABEL: @loadext_2i8_to_2i64(
-; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
-; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
-; SLM-NEXT: ret <2 x i64> [[TMP3]]
+; SSE-LABEL: @loadext_2i8_to_2i64(
+; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
+; SSE-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i64>
+; SSE-NEXT: ret <2 x i64> [[TMP2]]
;
; AVX-LABEL: @loadext_2i8_to_2i64(
; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll
index 7d38aeb0c36357..744a50906cfc48 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SLM
+; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
@@ -11,20 +11,10 @@
;
define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
-; SSE2-LABEL: @loadext_2i8_to_2i64(
-; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
-; SSE2-NEXT: [[I0:%.*]] = load i8, ptr [[P0]], align 1
-; SSE2-NEXT: [[I1:%.*]] = load i8, ptr [[P1]], align 1
-; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64
-; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64
-; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
-; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
-; SSE2-NEXT: ret <2 x i64> [[V1]]
-;
-; SLM-LABEL: @loadext_2i8_to_2i64(
-; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
-; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
-; SLM-NEXT: ret <2 x i64> [[TMP3]]
+; SSE-LABEL: @loadext_2i8_to_2i64(
+; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
+; SSE-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i64>
+; SSE-NEXT: ret <2 x i64> [[TMP2]]
;
; AVX-LABEL: @loadext_2i8_to_2i64(
; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
index d1f6c41e5c30ec..27996a7064c0d8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
@@ -12,13 +12,8 @@
define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
; SSE2-LABEL: @loadext_2i8_to_2i64(
-; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
-; SSE2-NEXT: [[I0:%.*]] = load i8, ptr [[P0]], align 1
-; SSE2-NEXT: [[I1:%.*]] = load i8, ptr [[P1]], align 1
-; SSE2-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64
-; SSE2-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64
-; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> poison, i64 [[X0]], i32 0
-; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
+; SSE2-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
+; SSE2-NEXT: [[V1:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i64>
; SSE2-NEXT: ret <2 x i64> [[V1]]
;
; SLM-LABEL: @loadext_2i8_to_2i64(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll
index 829e4bab20ffaf..94870420f2bfc6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll
@@ -12,13 +12,8 @@
define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
; SSE2-LABEL: @loadext_2i8_to_2i64(
-; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
-; SSE2-NEXT: [[I0:%.*]] = load i8, ptr [[P0]], align 1
-; SSE2-NEXT: [[I1:%.*]] = load i8, ptr [[P1]], align 1
-; SSE2-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64
-; SSE2-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64
-; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
-; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
+; SSE2-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
+; SSE2-NEXT: [[V1:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i64>
; SSE2-NEXT: ret <2 x i64> [[V1]]
;
; SLM-LABEL: @loadext_2i8_to_2i64(
More information about the llvm-commits
mailing list