[llvm] [VectorCombine] Allow shuffling between vectors the same type but different element sizes (PR #121216)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 27 09:42:10 PST 2024
https://github.com/ParkHanbum created https://github.com/llvm/llvm-project/pull/121216
`foldInsExtVectorToShuffle` function combines the extract/insert
of a vector into a vector through a shuffle. However, we only
supported coupling between vectors of the same size.
This commit allows combining extract/insert for vectors of
the same type but with different sizes by converting
the length of the vectors.
Proof: https://alive2.llvm.org/ce/z/ELNLr7
Fixed https://github.com/llvm/llvm-project/issues/120772
>From 7e34721e1d96320cf6947d37979b91bfbf4f289b Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 27 Dec 2024 17:49:02 +0900
Subject: [PATCH 1/2] add new tests
---
.../X86/extract-insert-poison.ll | 184 ++++++++++++++++++
.../VectorCombine/X86/extract-insert-undef.ll | 184 ++++++++++++++++++
.../VectorCombine/X86/extract-insert.ll | 183 +++++++++++++++++
3 files changed, 551 insertions(+)
create mode 100644 llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
create mode 100644 llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
create mode 100644 llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
new file mode 100644
index 00000000000000..f9cfe2f5166879
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
@@ -0,0 +1,184 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
+
+
+define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> poison, double %ext, i32 0
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> poison, double %ext, i32 1
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> poison, double %ext, i32 2
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> poison, double %ext, i32 3
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> poison, double %ext, i32 0
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> poison, double %ext, i32 1
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> poison, double %ext, i32 2
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> poison, double %ext, i32 3
+ ret <4 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 0
+ %ins = insertelement <2 x double> poison, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 1
+ %ins = insertelement <2 x double> poison, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 2
+ %ins = insertelement <2 x double> poison, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 3
+ %ins = insertelement <2 x double> poison, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 0
+ %ins = insertelement <2 x double> poison, double %ext, i32 1
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 1
+ %ins = insertelement <2 x double> poison, double %ext, i32 1
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 2
+ %ins = insertelement <2 x double> poison, double %ext, i32 1
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 3
+ %ins = insertelement <2 x double> poison, double %ext, i32 1
+ ret <2 x double> %ins
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX: {{.*}}
+; SSE: {{.*}}
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
new file mode 100644
index 00000000000000..c47c196bb92ba2
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
@@ -0,0 +1,184 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
+
+
+define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> undef, double %ext, i32 0
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> undef, double %ext, i32 1
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 2
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> undef, double %ext, i32 2
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 3
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> undef, double %ext, i32 3
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> undef, double %ext, i32 0
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> undef, double %ext, i32 1
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 2
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> undef, double %ext, i32 2
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 3
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> undef, double %ext, i32 3
+ ret <4 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 0
+ %ins = insertelement <2 x double> undef, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 1
+ %ins = insertelement <2 x double> undef, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 2
+ %ins = insertelement <2 x double> undef, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 3
+ %ins = insertelement <2 x double> undef, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 0
+ %ins = insertelement <2 x double> undef, double %ext, i32 1
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 1
+ %ins = insertelement <2 x double> undef, double %ext, i32 1
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 2
+ %ins = insertelement <2 x double> undef, double %ext, i32 1
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 3
+ %ins = insertelement <2 x double> undef, double %ext, i32 1
+ ret <2 x double> %ins
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX: {{.*}}
+; SSE: {{.*}}
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
new file mode 100644
index 00000000000000..66dc9aac6b6786
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
@@ -0,0 +1,183 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
+
+
+define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> %a, double %ext, i32 0
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> %a, double %ext, i32 1
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 2
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> %a, double %ext, i32 2
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 3
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 0
+ %ins = insertelement <4 x double> %a, double %ext, i32 3
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> %a, double %ext, i32 0
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> %a, double %ext, i32 1
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 2
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> %a, double %ext, i32 2
+ ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 3
+; CHECK-NEXT: ret <4 x double> [[INS]]
+;
+ %ext = extractelement <2 x double> %b, i32 1
+ %ins = insertelement <4 x double> %a, double %ext, i32 3
+ ret <4 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 0
+ %ins = insertelement <2 x double> %a, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 1
+ %ins = insertelement <2 x double> %a, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 2
+ %ins = insertelement <2 x double> %a, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 3
+ %ins = insertelement <2 x double> %a, double %ext, i32 0
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 0
+ %ins = insertelement <2 x double> %a, double %ext, i32 1
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 1
+ %ins = insertelement <2 x double> %a, double %ext, i32 1
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 2
+ %ins = insertelement <2 x double> %a, double %ext, i32 1
+ ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
+; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT: ret <2 x double> [[INS]]
+;
+ %ext = extractelement <4 x double> %b, i32 3
+ %ins = insertelement <2 x double> %a, double %ext, i32 1
+ ret <2 x double> %ins
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX: {{.*}}
+; SSE: {{.*}}
>From bcc7fb04d0199daa71050a51037e3897dbebdf80 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 27 Dec 2024 17:49:24 +0900
Subject: [PATCH 2/2] [VectorCombine] Allow shuffling between vectors the same
type but different element sizes
`foldInsExtVectorToShuffle` function combines the extract/insert
of a vector into a vector through a shuffle. However, we only
supported coupling between vectors of the same size.
This commit allows combining extract/insert for vectors of
the same type but with different sizes by converting
the length of the vectors.
Proof: https://alive2.llvm.org/ce/z/ELNLr7
Fixed #120772
---
.../Transforms/Vectorize/VectorCombine.cpp | 49 ++++++++++++--
.../X86/extract-insert-poison.ll | 67 ++++++++++++-------
.../VectorCombine/X86/extract-insert-undef.ll | 28 ++++----
.../VectorCombine/X86/extract-insert.ll | 54 ++++++++-------
.../VectorCombine/X86/load-inseltpoison.ll | 4 +-
5 files changed, 132 insertions(+), 70 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index ecbc13d489eb37..d6e4e53cca7692 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3018,24 +3018,37 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
return false;
auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
- if (!VecTy || SrcVec->getType() != VecTy)
+ auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType());
+ // We can try combining vectors with different element sizes.
+ if (!VecTy || !SrcVecTy ||
+ SrcVecTy->getElementType() != VecTy->getElementType())
return false;
unsigned NumElts = VecTy->getNumElements();
- if (ExtIdx >= NumElts || InsIdx >= NumElts)
+ unsigned NumSrcElts = SrcVecTy->getNumElements();
+ if (InsIdx >= NumElts || NumElts == 1)
return false;
// Insertion into poison is a cheaper single operand shuffle.
TargetTransformInfo::ShuffleKind SK;
SmallVector<int> Mask(NumElts, PoisonMaskElem);
- if (isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
+
+ bool NeedExpOrNarrow = NumSrcElts != NumElts;
+ bool NeedDstSrcSwap = isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec);
+ if (NeedDstSrcSwap) {
SK = TargetTransformInfo::SK_PermuteSingleSrc;
- Mask[InsIdx] = ExtIdx;
+ if (!NeedExpOrNarrow)
+ Mask[InsIdx] = ExtIdx;
+ else
+ Mask[InsIdx] = 0;
std::swap(DstVec, SrcVec);
} else {
SK = TargetTransformInfo::SK_PermuteTwoSrc;
std::iota(Mask.begin(), Mask.end(), 0);
- Mask[InsIdx] = ExtIdx + NumElts;
+ if (!NeedExpOrNarrow)
+ Mask[InsIdx] = ExtIdx + NumElts;
+ else
+ Mask[InsIdx] = NumElts;
}
// Cost
@@ -3047,8 +3060,23 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx);
InstructionCost OldCost = ExtCost + InsCost;
- InstructionCost NewCost = TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0,
- nullptr, {DstVec, SrcVec});
+ InstructionCost NewCost = 0;
+ SmallVector<int> ExtToVecMask;
+ if (!NeedExpOrNarrow) {
+ NewCost = TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0, nullptr,
+ {DstVec, SrcVec});
+ } else {
+ // When creating length-changing-vector, always create with a Mask whose
+ // first element has an ExtIdx, so that the first element of the vector
+ // being created is always the target to be extracted.
+ ExtToVecMask.assign(NumElts, PoisonMaskElem);
+ ExtToVecMask[0] = ExtIdx;
+ // Add cost for expanding or narrowing
+ NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+ VecTy, ExtToVecMask, CostKind);
+ NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind);
+ }
+
if (!Ext->hasOneUse())
NewCost += ExtCost;
@@ -3059,6 +3087,13 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
if (OldCost < NewCost)
return false;
+ if (NeedExpOrNarrow) {
+ if (!NeedDstSrcSwap)
+ SrcVec = Builder.CreateShuffleVector(SrcVec, ExtToVecMask);
+ else
+ DstVec = Builder.CreateShuffleVector(DstVec, ExtToVecMask);
+ }
+
// Canonicalize undef param to RHS to help further folds.
if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
index f9cfe2f5166879..fe303438f9588d 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
@@ -26,10 +26,15 @@ define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
}
define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
-; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
-; CHECK-NEXT: ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins2_v4f64_ext0_v2f64(
+; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
+; SSE-NEXT: ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins2_v4f64_ext0_v2f64(
+; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
+; AVX-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 0
%ins = insertelement <4 x double> poison, double %ext, i32 2
@@ -37,10 +42,15 @@ define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
}
define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
-; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
-; CHECK-NEXT: ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins3_v4f64_ext0_v2f64(
+; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
+; SSE-NEXT: ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins3_v4f64_ext0_v2f64(
+; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
+; AVX-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 0
%ins = insertelement <4 x double> poison, double %ext, i32 3
@@ -60,8 +70,8 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>
; CHECK-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 1
@@ -70,10 +80,15 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
}
define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
-; CHECK-NEXT: ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
+; SSE-NEXT: ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
+; AVX-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 1
%ins = insertelement <4 x double> poison, double %ext, i32 2
@@ -81,10 +96,15 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
}
define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
-; CHECK-NEXT: ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins3_v4f64_ext1_v2f64(
+; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
+; SSE-NEXT: ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins3_v4f64_ext1_v2f64(
+; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
+; AVX-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 1
%ins = insertelement <4 x double> poison, double %ext, i32 3
@@ -148,8 +168,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 1
@@ -170,8 +190,8 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 3
@@ -179,6 +199,3 @@ define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b)
ret <2 x double> %ins
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX: {{.*}}
-; SSE: {{.*}}
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
index c47c196bb92ba2..6051e6ff512fe7 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
@@ -59,10 +59,15 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
}
define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
-; CHECK-NEXT: ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
+; SSE-NEXT: ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
+; AVX-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 1
%ins = insertelement <4 x double> undef, double %ext, i32 1
@@ -82,8 +87,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 3
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 0>
; CHECK-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 1
@@ -148,8 +153,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> <i32 2, i32 0>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 1
@@ -170,8 +175,8 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> <i32 2, i32 0>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 3
@@ -179,6 +184,3 @@ define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b)
ret <2 x double> %ins
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX: {{.*}}
-; SSE: {{.*}}
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
index 66dc9aac6b6786..31c4834ff65847 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
@@ -48,10 +48,15 @@ define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
}
define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 0
-; CHECK-NEXT: ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; SSE-NEXT: ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 0
+; AVX-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 1
%ins = insertelement <4 x double> %a, double %ext, i32 0
@@ -59,10 +64,15 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
}
define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 1
-; CHECK-NEXT: ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 4, i32 2, i32 3>
+; SSE-NEXT: ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 1
+; AVX-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 1
%ins = insertelement <4 x double> %a, double %ext, i32 1
@@ -71,8 +81,8 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 2
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
; CHECK-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 1
@@ -82,8 +92,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 3
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 4>
; CHECK-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 1
@@ -104,8 +114,8 @@ define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 1
@@ -126,8 +136,8 @@ define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 3
@@ -148,8 +158,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 1
@@ -170,14 +180,12 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
+; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 3
%ins = insertelement <2 x double> %a, double %ext, i32 1
ret <2 x double> %ins
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX: {{.*}}
-; SSE: {{.*}}
+
diff --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
index 937d4043adc0c4..b7a54c6d53f880 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
@@ -592,8 +592,8 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer
; CHECK-LABEL: @load_v1i32_extract_insert_v8i32_extra_use(
; CHECK-NEXT: [[L:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4
; CHECK-NEXT: store <1 x i32> [[L]], ptr [[STORE_PTR:%.*]], align 4
-; CHECK-NEXT: [[S:%.*]] = extractelement <1 x i32> [[L]], i32 0
-; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> poison, i32 [[S]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i32> [[L]], <1 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: ret <8 x i32> [[R]]
;
%l = load <1 x i32>, ptr %p, align 4
More information about the llvm-commits
mailing list