[llvm] [VectorCombine] Allow shuffling between vectors the same type but different element sizes (PR #121216)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 21 09:55:31 PST 2025


https://github.com/ParkHanbum updated https://github.com/llvm/llvm-project/pull/121216

>From b9e10359920086ed24ec725b4eab78714b72a9f9 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 27 Dec 2024 17:49:02 +0900
Subject: [PATCH 1/6] add new tests

---
 .../X86/extract-insert-poison.ll              | 184 ++++++++++++++++++
 .../VectorCombine/X86/extract-insert-undef.ll | 184 ++++++++++++++++++
 .../VectorCombine/X86/extract-insert.ll       | 183 +++++++++++++++++
 3 files changed, 551 insertions(+)
 create mode 100644 llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
 create mode 100644 llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
 create mode 100644 llvm/test/Transforms/VectorCombine/X86/extract-insert.ll

diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
new file mode 100644
index 00000000000000..f9cfe2f5166879
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
@@ -0,0 +1,184 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
+
+
+define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> poison, double %ext, i32 0
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> poison, double %ext, i32 1
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> poison, double %ext, i32 2
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> poison, double %ext, i32 3
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> poison, double %ext, i32 0
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> poison, double %ext, i32 1
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> poison, double %ext, i32 2
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> poison, double %ext, i32 3
+  ret <4 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 0
+  %ins = insertelement <2 x double> poison, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 1
+  %ins = insertelement <2 x double> poison, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 2
+  %ins = insertelement <2 x double> poison, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 3
+  %ins = insertelement <2 x double> poison, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 0
+  %ins = insertelement <2 x double> poison, double %ext, i32 1
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 1
+  %ins = insertelement <2 x double> poison, double %ext, i32 1
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 2
+  %ins = insertelement <2 x double> poison, double %ext, i32 1
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 3
+  %ins = insertelement <2 x double> poison, double %ext, i32 1
+  ret <2 x double> %ins
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX: {{.*}}
+; SSE: {{.*}}
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
new file mode 100644
index 00000000000000..c47c196bb92ba2
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
@@ -0,0 +1,184 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
+
+
+define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> undef, double %ext, i32 0
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> undef, double %ext, i32 1
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 2
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> undef, double %ext, i32 2
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 3
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> undef, double %ext, i32 3
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> undef, double %ext, i32 0
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> undef, double %ext, i32 1
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 2
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> undef, double %ext, i32 2
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 3
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> undef, double %ext, i32 3
+  ret <4 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 0
+  %ins = insertelement <2 x double> undef, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 1
+  %ins = insertelement <2 x double> undef, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 2
+  %ins = insertelement <2 x double> undef, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 3
+  %ins = insertelement <2 x double> undef, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 0
+  %ins = insertelement <2 x double> undef, double %ext, i32 1
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 1
+  %ins = insertelement <2 x double> undef, double %ext, i32 1
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 2
+  %ins = insertelement <2 x double> undef, double %ext, i32 1
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 3
+  %ins = insertelement <2 x double> undef, double %ext, i32 1
+  ret <2 x double> %ins
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX: {{.*}}
+; SSE: {{.*}}
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
new file mode 100644
index 00000000000000..66dc9aac6b6786
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
@@ -0,0 +1,183 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
+
+
+define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> %a, double %ext, i32 0
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> %a, double %ext, i32 1
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 2
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> %a, double %ext, i32 2
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 3
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> %a, double %ext, i32 3
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> %a, double %ext, i32 0
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> %a, double %ext, i32 1
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 2
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> %a, double %ext, i32 2
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 3
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> %a, double %ext, i32 3
+  ret <4 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 0
+  %ins = insertelement <2 x double> %a, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 1
+  %ins = insertelement <2 x double> %a, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 2
+  %ins = insertelement <2 x double> %a, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 3
+  %ins = insertelement <2 x double> %a, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 0
+  %ins = insertelement <2 x double> %a, double %ext, i32 1
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 1
+  %ins = insertelement <2 x double> %a, double %ext, i32 1
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 2
+  %ins = insertelement <2 x double> %a, double %ext, i32 1
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 3
+  %ins = insertelement <2 x double> %a, double %ext, i32 1
+  ret <2 x double> %ins
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX: {{.*}}
+; SSE: {{.*}}

>From 7f056fc1c28c29eb0e7123a321a24a79570a2600 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 27 Dec 2024 17:49:24 +0900
Subject: [PATCH 2/6] [VectorCombine] Allow shuffling between vectors the same
 type but different element sizes

`foldInsExtVectorToShuffle` function combines the extract/insert
of a vector into a vector through a shuffle. However, we only
supported coupling between vectors of the same size.

This commit allows combining extract/insert for vectors of
the same type but with different sizes by converting
the length of the vectors.

Proof: https://alive2.llvm.org/ce/z/EWFmfL
Fixed #120772
---
 .../Transforms/Vectorize/VectorCombine.cpp    | 54 +++++++++++++----
 .../X86/extract-insert-poison.ll              | 58 +++++++++++--------
 .../VectorCombine/X86/extract-insert-undef.ll | 28 ++++-----
 .../VectorCombine/X86/extract-insert.ll       | 54 +++++++++--------
 .../VectorCombine/X86/load-inseltpoison.ll    |  4 +-
 5 files changed, 127 insertions(+), 71 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index ea53a1acebd1df..03878989967325 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3068,24 +3068,37 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
     return false;
 
   auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
-  if (!VecTy || SrcVec->getType() != VecTy)
+  auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType());
+  // We can try combining vectors with different element sizes.
+  if (!VecTy || !SrcVecTy ||
+      SrcVecTy->getElementType() != VecTy->getElementType())
     return false;
 
   unsigned NumElts = VecTy->getNumElements();
-  if (ExtIdx >= NumElts || InsIdx >= NumElts)
+  unsigned NumSrcElts = SrcVecTy->getNumElements();
+  if (InsIdx >= NumElts || NumElts == 1)
     return false;
 
   // Insertion into poison is a cheaper single operand shuffle.
   TargetTransformInfo::ShuffleKind SK;
   SmallVector<int> Mask(NumElts, PoisonMaskElem);
-  if (isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
+
+  bool NeedExpOrNarrow = NumSrcElts != NumElts;
+  bool NeedDstSrcSwap = isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec);
+  if (NeedDstSrcSwap) {
     SK = TargetTransformInfo::SK_PermuteSingleSrc;
-    Mask[InsIdx] = ExtIdx;
+    if (!NeedExpOrNarrow)
+      Mask[InsIdx] = ExtIdx;
+    else
+      Mask[InsIdx] = 0;
     std::swap(DstVec, SrcVec);
   } else {
     SK = TargetTransformInfo::SK_PermuteTwoSrc;
     std::iota(Mask.begin(), Mask.end(), 0);
-    Mask[InsIdx] = ExtIdx + NumElts;
+    if (!NeedExpOrNarrow)
+      Mask[InsIdx] = ExtIdx + NumElts;
+    else
+      Mask[InsIdx] = NumElts;
   }
 
   // Cost
@@ -3097,12 +3110,26 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
       TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx);
   InstructionCost OldCost = ExtCost + InsCost;
 
-  // Ignore 'free' identity insertion shuffle.
-  // TODO: getShuffleCost should return TCC_Free for Identity shuffles.
   InstructionCost NewCost = 0;
-  if (!ShuffleVectorInst::isIdentityMask(Mask, NumElts))
-    NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0, nullptr,
-                                  {DstVec, SrcVec});
+  SmallVector<int> ExtToVecMask;
+  if (!NeedExpOrNarrow) {
+    // Ignore 'free' identity insertion shuffle.
+    // TODO: getShuffleCost should return TCC_Free for Identity shuffles.
+    if (!ShuffleVectorInst::isIdentityMask(Mask, NumElts))
+      NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0, nullptr,
+                                    {DstVec, SrcVec});
+  } else {
+    // When creating length-changing-vector, always create with a Mask whose
+    // first element has an ExtIdx, so that the first element of the vector
+    // being created is always the target to be extracted.
+    ExtToVecMask.assign(NumElts, PoisonMaskElem);
+    ExtToVecMask[0] = ExtIdx;
+    // Add cost for expanding or narrowing
+    NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+                                 VecTy, ExtToVecMask, CostKind);
+    NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind);
+  }
+
   if (!Ext->hasOneUse())
     NewCost += ExtCost;
 
@@ -3113,6 +3140,13 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   if (OldCost < NewCost)
     return false;
 
+  if (NeedExpOrNarrow) {
+    if (!NeedDstSrcSwap)
+      SrcVec = Builder.CreateShuffleVector(SrcVec, ExtToVecMask);
+    else
+      DstVec = Builder.CreateShuffleVector(DstVec, ExtToVecMask);
+  }
+
   // Canonicalize undef param to RHS to help further folds.
   if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
     ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
index f9cfe2f5166879..dc0ca3dacbb268 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
@@ -26,10 +26,15 @@ define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
 }
 
 define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
-; CHECK-NEXT:    ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins2_v4f64_ext0_v2f64(
+; SSE-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; SSE-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
+; SSE-NEXT:    ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins2_v4f64_ext0_v2f64(
+; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
+; AVX-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 0
   %ins = insertelement <4 x double> poison, double %ext, i32 2
@@ -37,10 +42,15 @@ define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
 }
 
 define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
-; CHECK-NEXT:    ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins3_v4f64_ext0_v2f64(
+; SSE-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; SSE-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
+; SSE-NEXT:    ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins3_v4f64_ext0_v2f64(
+; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
+; AVX-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 0
   %ins = insertelement <4 x double> poison, double %ext, i32 3
@@ -60,8 +70,8 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 
 define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
 ; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>
 ; CHECK-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
@@ -70,10 +80,15 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 }
 
 define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
-; CHECK-NEXT:    ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; SSE-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; SSE-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
+; SSE-NEXT:    ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
+; AVX-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
   %ins = insertelement <4 x double> poison, double %ext, i32 2
@@ -82,8 +97,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 
 define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
 ; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
 ; CHECK-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
@@ -148,8 +163,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
 
 define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
 ; CHECK-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 1
@@ -170,8 +185,8 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b)
 
 define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
 ; CHECK-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 3
@@ -179,6 +194,3 @@ define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b)
   ret <2 x double> %ins
 }
 
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX: {{.*}}
-; SSE: {{.*}}
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
index c47c196bb92ba2..6051e6ff512fe7 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
@@ -59,10 +59,15 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 }
 
 define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
-; CHECK-NEXT:    ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
+; SSE-NEXT:    ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; AVX-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; AVX-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
+; AVX-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
   %ins = insertelement <4 x double> undef, double %ext, i32 1
@@ -82,8 +87,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 
 define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
 ; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 0>
 ; CHECK-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
@@ -148,8 +153,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
 
 define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> <i32 2, i32 0>
 ; CHECK-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 1
@@ -170,8 +175,8 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b)
 
 define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> <i32 2, i32 0>
 ; CHECK-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 3
@@ -179,6 +184,3 @@ define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b)
   ret <2 x double> %ins
 }
 
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX: {{.*}}
-; SSE: {{.*}}
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
index 66dc9aac6b6786..31c4834ff65847 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
@@ -48,10 +48,15 @@ define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
 }
 
 define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 0
-; CHECK-NEXT:    ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; SSE-NEXT:    ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; AVX-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; AVX-NEXT:    [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 0
+; AVX-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
   %ins = insertelement <4 x double> %a, double %ext, i32 0
@@ -59,10 +64,15 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 }
 
 define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 1
-; CHECK-NEXT:    ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 4, i32 2, i32 3>
+; SSE-NEXT:    ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; AVX-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; AVX-NEXT:    [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 1
+; AVX-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
   %ins = insertelement <4 x double> %a, double %ext, i32 1
@@ -71,8 +81,8 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 
 define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
 ; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 2
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
 ; CHECK-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
@@ -82,8 +92,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 
 define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
 ; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 4>
 ; CHECK-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
@@ -104,8 +114,8 @@ define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
 
 define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 1
@@ -126,8 +136,8 @@ define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b)
 
 define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 3
@@ -148,8 +158,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
 
 define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 1
@@ -170,14 +180,12 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b)
 
 define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 3
   %ins = insertelement <2 x double> %a, double %ext, i32 1
   ret <2 x double> %ins
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX: {{.*}}
-; SSE: {{.*}}
+
diff --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
index 2db1e21b3e95a7..3b2aa5e6603b07 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
@@ -589,8 +589,8 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer
 ; CHECK-LABEL: @load_v1i32_extract_insert_v8i32_extra_use(
 ; CHECK-NEXT:    [[L:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    store <1 x i32> [[L]], ptr [[STORE_PTR:%.*]], align 4
-; CHECK-NEXT:    [[S:%.*]] = extractelement <1 x i32> [[L]], i32 0
-; CHECK-NEXT:    [[R:%.*]] = insertelement <8 x i32> poison, i32 [[S]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <1 x i32> [[L]], <1 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    ret <8 x i32> [[R]]
 ;
   %l = load <1 x i32>, ptr %p, align 4

>From 687d1c921fb6dc08ec902a694845587252d29231 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Tue, 7 Jan 2025 01:40:14 +0900
Subject: [PATCH 3/6] Add the omitted Dst to the variable name

---
 .../Transforms/Vectorize/VectorCombine.cpp    | 34 +++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 03878989967325..89477246e0ec9c 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3067,23 +3067,23 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
                          m_ConstantInt(InsIdx))))
     return false;
 
-  auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
+  auto *DstVecTy = dyn_cast<FixedVectorType>(I.getType());
   auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType());
   // We can try combining vectors with different element sizes.
-  if (!VecTy || !SrcVecTy ||
-      SrcVecTy->getElementType() != VecTy->getElementType())
+  if (!DstVecTy || !SrcVecTy ||
+      SrcVecTy->getElementType() != DstVecTy->getElementType())
     return false;
 
-  unsigned NumElts = VecTy->getNumElements();
+  unsigned NumDstElts = DstVecTy->getNumElements();
   unsigned NumSrcElts = SrcVecTy->getNumElements();
-  if (InsIdx >= NumElts || NumElts == 1)
+  if (InsIdx >= NumDstElts || NumDstElts == 1)
     return false;
 
   // Insertion into poison is a cheaper single operand shuffle.
   TargetTransformInfo::ShuffleKind SK;
-  SmallVector<int> Mask(NumElts, PoisonMaskElem);
+  SmallVector<int> Mask(NumDstElts, PoisonMaskElem);
 
-  bool NeedExpOrNarrow = NumSrcElts != NumElts;
+  bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
   bool NeedDstSrcSwap = isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec);
   if (NeedDstSrcSwap) {
     SK = TargetTransformInfo::SK_PermuteSingleSrc;
@@ -3096,18 +3096,18 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
     SK = TargetTransformInfo::SK_PermuteTwoSrc;
     std::iota(Mask.begin(), Mask.end(), 0);
     if (!NeedExpOrNarrow)
-      Mask[InsIdx] = ExtIdx + NumElts;
+      Mask[InsIdx] = ExtIdx + NumDstElts;
     else
-      Mask[InsIdx] = NumElts;
+      Mask[InsIdx] = NumDstElts;
   }
 
   // Cost
   auto *Ins = cast<InsertElementInst>(&I);
   auto *Ext = cast<ExtractElementInst>(I.getOperand(1));
   InstructionCost InsCost =
-      TTI.getVectorInstrCost(*Ins, VecTy, CostKind, InsIdx);
+      TTI.getVectorInstrCost(*Ins, DstVecTy, CostKind, InsIdx);
   InstructionCost ExtCost =
-      TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx);
+      TTI.getVectorInstrCost(*Ext, DstVecTy, CostKind, ExtIdx);
   InstructionCost OldCost = ExtCost + InsCost;
 
   InstructionCost NewCost = 0;
@@ -3115,19 +3115,19 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   if (!NeedExpOrNarrow) {
     // Ignore 'free' identity insertion shuffle.
     // TODO: getShuffleCost should return TCC_Free for Identity shuffles.
-    if (!ShuffleVectorInst::isIdentityMask(Mask, NumElts))
-      NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0, nullptr,
+    if (!ShuffleVectorInst::isIdentityMask(Mask, NumDstElts))
+      NewCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr,
                                     {DstVec, SrcVec});
   } else {
     // When creating length-changing-vector, always create with a Mask whose
     // first element has an ExtIdx, so that the first element of the vector
     // being created is always the target to be extracted.
-    ExtToVecMask.assign(NumElts, PoisonMaskElem);
+    ExtToVecMask.assign(NumDstElts, PoisonMaskElem);
     ExtToVecMask[0] = ExtIdx;
     // Add cost for expanding or narrowing
     NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
-                                 VecTy, ExtToVecMask, CostKind);
-    NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind);
+                                 DstVecTy, ExtToVecMask, CostKind);
+    NewCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind);
   }
 
   if (!Ext->hasOneUse())
@@ -3149,7 +3149,7 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
 
   // Canonicalize undef param to RHS to help further folds.
   if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
-    ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
+    ShuffleVectorInst::commuteShuffleMask(Mask, NumDstElts);
     std::swap(DstVec, SrcVec);
   }
 

>From 6a04dfe8005ad20e6ca750928964d579e4c694ec Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Tue, 7 Jan 2025 01:43:46 +0900
Subject: [PATCH 4/6] Returns false early if condition 'ExtIdx>=NumSrcElts' is
 met

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 89477246e0ec9c..12555588ed47f3 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3076,7 +3076,7 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
 
   unsigned NumDstElts = DstVecTy->getNumElements();
   unsigned NumSrcElts = SrcVecTy->getNumElements();
-  if (InsIdx >= NumDstElts || NumDstElts == 1)
+  if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
     return false;
 
   // Insertion into poison is a cheaper single operand shuffle.

>From 824c68ec9d359bc358534b61178ef4040b2c19f4 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sat, 18 Jan 2025 21:37:58 +0900
Subject: [PATCH 5/6] remove test cases for undef

---
 .../VectorCombine/X86/extract-insert-undef.ll | 186 ------------------
 1 file changed, 186 deletions(-)
 delete mode 100644 llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll

diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
deleted file mode 100644
index 6051e6ff512fe7..00000000000000
--- a/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
+++ /dev/null
@@ -1,186 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
-
-
-define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 0
-; CHECK-NEXT:    ret <4 x double> [[INS]]
-;
-  %ext = extractelement <2 x double> %b, i32 0
-  %ins = insertelement <4 x double> undef, double %ext, i32 0
-  ret <4 x double> %ins
-}
-
-define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
-; CHECK-NEXT:    ret <4 x double> [[INS]]
-;
-  %ext = extractelement <2 x double> %b, i32 0
-  %ins = insertelement <4 x double> undef, double %ext, i32 1
-  ret <4 x double> %ins
-}
-
-define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 2
-; CHECK-NEXT:    ret <4 x double> [[INS]]
-;
-  %ext = extractelement <2 x double> %b, i32 0
-  %ins = insertelement <4 x double> undef, double %ext, i32 2
-  ret <4 x double> %ins
-}
-
-define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 3
-; CHECK-NEXT:    ret <4 x double> [[INS]]
-;
-  %ext = extractelement <2 x double> %b, i32 0
-  %ins = insertelement <4 x double> undef, double %ext, i32 3
-  ret <4 x double> %ins
-}
-
-define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 0
-; CHECK-NEXT:    ret <4 x double> [[INS]]
-;
-  %ext = extractelement <2 x double> %b, i32 1
-  %ins = insertelement <4 x double> undef, double %ext, i32 0
-  ret <4 x double> %ins
-}
-
-define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; SSE-LABEL: @src_ins1_v4f64_ext1_v2f64(
-; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
-; SSE-NEXT:    ret <4 x double> [[INS]]
-;
-; AVX-LABEL: @src_ins1_v4f64_ext1_v2f64(
-; AVX-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; AVX-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
-; AVX-NEXT:    ret <4 x double> [[INS]]
-;
-  %ext = extractelement <2 x double> %b, i32 1
-  %ins = insertelement <4 x double> undef, double %ext, i32 1
-  ret <4 x double> %ins
-}
-
-define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 2
-; CHECK-NEXT:    ret <4 x double> [[INS]]
-;
-  %ext = extractelement <2 x double> %b, i32 1
-  %ins = insertelement <4 x double> undef, double %ext, i32 2
-  ret <4 x double> %ins
-}
-
-define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 0>
-; CHECK-NEXT:    ret <4 x double> [[INS]]
-;
-  %ext = extractelement <2 x double> %b, i32 1
-  %ins = insertelement <4 x double> undef, double %ext, i32 3
-  ret <4 x double> %ins
-}
-
-define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
-; CHECK-NEXT:    ret <2 x double> [[INS]]
-;
-  %ext = extractelement <4 x double> %b, i32 0
-  %ins = insertelement <2 x double> undef, double %ext, i32 0
-  ret <2 x double> %ins
-}
-
-define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
-; CHECK-NEXT:    ret <2 x double> [[INS]]
-;
-  %ext = extractelement <4 x double> %b, i32 1
-  %ins = insertelement <2 x double> undef, double %ext, i32 0
-  ret <2 x double> %ins
-}
-
-define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
-; CHECK-NEXT:    ret <2 x double> [[INS]]
-;
-  %ext = extractelement <4 x double> %b, i32 2
-  %ins = insertelement <2 x double> undef, double %ext, i32 0
-  ret <2 x double> %ins
-}
-
-define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
-; CHECK-NEXT:    ret <2 x double> [[INS]]
-;
-  %ext = extractelement <4 x double> %b, i32 3
-  %ins = insertelement <2 x double> undef, double %ext, i32 0
-  ret <2 x double> %ins
-}
-
-define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
-; CHECK-NEXT:    ret <2 x double> [[INS]]
-;
-  %ext = extractelement <4 x double> %b, i32 0
-  %ins = insertelement <2 x double> undef, double %ext, i32 1
-  ret <2 x double> %ins
-}
-
-define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
-; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> <i32 2, i32 0>
-; CHECK-NEXT:    ret <2 x double> [[INS]]
-;
-  %ext = extractelement <4 x double> %b, i32 1
-  %ins = insertelement <2 x double> undef, double %ext, i32 1
-  ret <2 x double> %ins
-}
-
-define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
-; CHECK-NEXT:    ret <2 x double> [[INS]]
-;
-  %ext = extractelement <4 x double> %b, i32 2
-  %ins = insertelement <2 x double> undef, double %ext, i32 1
-  ret <2 x double> %ins
-}
-
-define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
-; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> <i32 2, i32 0>
-; CHECK-NEXT:    ret <2 x double> [[INS]]
-;
-  %ext = extractelement <4 x double> %b, i32 3
-  %ins = insertelement <2 x double> undef, double %ext, i32 1
-  ret <2 x double> %ins
-}
-

>From e6525d911765e444dd16c9219d70afb72ba5e474 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Tue, 21 Jan 2025 04:01:20 +0900
Subject: [PATCH 6/6] keep extidx if it within dest vector bound

---
 .../Transforms/Vectorize/VectorCombine.cpp    | 18 +++++----
 .../X86/extract-insert-poison.ll              | 38 +++++++++----------
 .../VectorCombine/X86/extract-insert.ll       | 33 +++++++---------
 3 files changed, 44 insertions(+), 45 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 12555588ed47f3..1e1ecba84fea0c 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3084,21 +3084,22 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   SmallVector<int> Mask(NumDstElts, PoisonMaskElem);
 
   bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
+  bool IsExtIdxInBounds = ExtIdx < NumDstElts;
   bool NeedDstSrcSwap = isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec);
   if (NeedDstSrcSwap) {
     SK = TargetTransformInfo::SK_PermuteSingleSrc;
-    if (!NeedExpOrNarrow)
-      Mask[InsIdx] = ExtIdx;
-    else
+    if (!IsExtIdxInBounds && NeedExpOrNarrow)
       Mask[InsIdx] = 0;
+    else
+      Mask[InsIdx] = ExtIdx;
     std::swap(DstVec, SrcVec);
   } else {
     SK = TargetTransformInfo::SK_PermuteTwoSrc;
     std::iota(Mask.begin(), Mask.end(), 0);
-    if (!NeedExpOrNarrow)
-      Mask[InsIdx] = ExtIdx + NumDstElts;
-    else
+    if (!IsExtIdxInBounds && NeedExpOrNarrow)
       Mask[InsIdx] = NumDstElts;
+    else
+      Mask[InsIdx] = ExtIdx + NumDstElts;
   }
 
   // Cost
@@ -3123,7 +3124,10 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
     // first element has an ExtIdx, so that the first element of the vector
     // being created is always the target to be extracted.
     ExtToVecMask.assign(NumDstElts, PoisonMaskElem);
-    ExtToVecMask[0] = ExtIdx;
+    if (IsExtIdxInBounds)
+      ExtToVecMask[ExtIdx] = ExtIdx;
+    else
+      ExtToVecMask[0] = ExtIdx;
     // Add cost for expanding or narrowing
     NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
                                  DstVecTy, ExtToVecMask, CostKind);
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
index dc0ca3dacbb268..0a9386c0b8db1d 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
@@ -58,10 +58,15 @@ define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
 }
 
 define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
-; CHECK-NEXT:    ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; SSE-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT:    ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; AVX-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; AVX-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
+; AVX-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
   %ins = insertelement <4 x double> poison, double %ext, i32 0
@@ -70,8 +75,8 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 
 define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
 ; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
 ; CHECK-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
@@ -80,15 +85,10 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 }
 
 define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; SSE-LABEL: @src_ins2_v4f64_ext1_v2f64(
-; SSE-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; SSE-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
-; SSE-NEXT:    ret <4 x double> [[INS]]
-;
-; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64(
-; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; AVX-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
-; AVX-NEXT:    ret <4 x double> [[INS]]
+; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
+; CHECK-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
   %ins = insertelement <4 x double> poison, double %ext, i32 2
@@ -97,8 +97,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 
 define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
 ; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 1>
 ; CHECK-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
@@ -163,8 +163,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
 
 define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
-; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 1>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 1>
 ; CHECK-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 1
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
index 31c4834ff65847..41200e517f5ed2 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll
@@ -49,8 +49,8 @@ define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
 
 define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
 ; SSE-LABEL: @src_ins0_v4f64_ext1_v2f64(
-; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; SSE-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 5, i32 1, i32 2, i32 3>
 ; SSE-NEXT:    ret <4 x double> [[INS]]
 ;
 ; AVX-LABEL: @src_ins0_v4f64_ext1_v2f64(
@@ -64,15 +64,10 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 }
 
 define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; SSE-LABEL: @src_ins1_v4f64_ext1_v2f64(
-; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 4, i32 2, i32 3>
-; SSE-NEXT:    ret <4 x double> [[INS]]
-;
-; AVX-LABEL: @src_ins1_v4f64_ext1_v2f64(
-; AVX-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
-; AVX-NEXT:    [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 1
-; AVX-NEXT:    ret <4 x double> [[INS]]
+; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
   %ins = insertelement <4 x double> %a, double %ext, i32 1
@@ -81,8 +76,8 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 
 define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
 ; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 5, i32 3>
 ; CHECK-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
@@ -92,8 +87,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 
 define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
 ; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
 ; CHECK-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
@@ -114,8 +109,8 @@ define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
 
 define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
-; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 1>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 3, i32 1>
 ; CHECK-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 1
@@ -158,8 +153,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
 
 define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
-; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 1>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    ret <2 x double> [[INS]]
 ;
   %ext = extractelement <4 x double> %b, i32 1



More information about the llvm-commits mailing list