[llvm] [VectorCombine] Allow shuffling between vectors the same type but different element sizes (PR #121216)

via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 27 09:42:42 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-vectorizers

Author: hanbeom (ParkHanbum)

<details>
<summary>Changes</summary>


`foldInsExtVectorToShuffle` function combines the extract/insert
of a vector into a vector through a shuffle. However, we only
supported coupling between vectors of the same size.

This commit allows combining extract/insert for vectors of
the same type but with different sizes by converting
the length of the vectors.

Proof: https://alive2.llvm.org/ce/z/ELNLr7
Fixed https://github.com/llvm/llvm-project/issues/120772

---

Patch is 32.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121216.diff


5 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+42-7) 
- (added) llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll (+201) 
- (added) llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll (+186) 
- (added) llvm/test/Transforms/VectorCombine/X86/extract-insert.ll (+191) 
- (modified) llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll (+2-2) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index ecbc13d489eb37..d6e4e53cca7692 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3018,24 +3018,37 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
     return false;
 
   auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
-  if (!VecTy || SrcVec->getType() != VecTy)
+  auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType());
+  // We can try combining vectors with different element sizes.
+  if (!VecTy || !SrcVecTy ||
+      SrcVecTy->getElementType() != VecTy->getElementType())
     return false;
 
   unsigned NumElts = VecTy->getNumElements();
-  if (ExtIdx >= NumElts || InsIdx >= NumElts)
+  unsigned NumSrcElts = SrcVecTy->getNumElements();
+  if (InsIdx >= NumElts || NumElts == 1)
     return false;
 
   // Insertion into poison is a cheaper single operand shuffle.
   TargetTransformInfo::ShuffleKind SK;
   SmallVector<int> Mask(NumElts, PoisonMaskElem);
-  if (isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
+
+  bool NeedExpOrNarrow = NumSrcElts != NumElts;
+  bool NeedDstSrcSwap = isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec);
+  if (NeedDstSrcSwap) {
     SK = TargetTransformInfo::SK_PermuteSingleSrc;
-    Mask[InsIdx] = ExtIdx;
+    if (!NeedExpOrNarrow)
+      Mask[InsIdx] = ExtIdx;
+    else
+      Mask[InsIdx] = 0;
     std::swap(DstVec, SrcVec);
   } else {
     SK = TargetTransformInfo::SK_PermuteTwoSrc;
     std::iota(Mask.begin(), Mask.end(), 0);
-    Mask[InsIdx] = ExtIdx + NumElts;
+    if (!NeedExpOrNarrow)
+      Mask[InsIdx] = ExtIdx + NumElts;
+    else
+      Mask[InsIdx] = NumElts;
   }
 
   // Cost
@@ -3047,8 +3060,23 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
       TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx);
   InstructionCost OldCost = ExtCost + InsCost;
 
-  InstructionCost NewCost = TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0,
-                                               nullptr, {DstVec, SrcVec});
+  InstructionCost NewCost = 0;
+  SmallVector<int> ExtToVecMask;
+  if (!NeedExpOrNarrow) {
+    NewCost = TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0, nullptr,
+                                 {DstVec, SrcVec});
+  } else {
+    // When creating length-changing-vector, always create with a Mask whose
+    // first element has an ExtIdx, so that the first element of the vector
+    // being created is always the target to be extracted.
+    ExtToVecMask.assign(NumElts, PoisonMaskElem);
+    ExtToVecMask[0] = ExtIdx;
+    // Add cost for expanding or narrowing
+    NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+                                 VecTy, ExtToVecMask, CostKind);
+    NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind);
+  }
+
   if (!Ext->hasOneUse())
     NewCost += ExtCost;
 
@@ -3059,6 +3087,13 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   if (OldCost < NewCost)
     return false;
 
+  if (NeedExpOrNarrow) {
+    if (!NeedDstSrcSwap)
+      SrcVec = Builder.CreateShuffleVector(SrcVec, ExtToVecMask);
+    else
+      DstVec = Builder.CreateShuffleVector(DstVec, ExtToVecMask);
+  }
+
   // Canonicalize undef param to RHS to help further folds.
   if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
     ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
new file mode 100644
index 00000000000000..fe303438f9588d
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
@@ -0,0 +1,201 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
+
+
+define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> poison, double %ext, i32 0
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> poison, double %ext, i32 1
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; SSE-LABEL: @src_ins2_v4f64_ext0_v2f64(
+; SSE-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; SSE-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
+; SSE-NEXT:    ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins2_v4f64_ext0_v2f64(
+; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
+; AVX-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> poison, double %ext, i32 2
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; SSE-LABEL: @src_ins3_v4f64_ext0_v2f64(
+; SSE-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; SSE-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
+; SSE-NEXT:    ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins3_v4f64_ext0_v2f64(
+; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
+; AVX-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> poison, double %ext, i32 3
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> poison, double %ext, i32 0
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> poison, double %ext, i32 1
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; SSE-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; SSE-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; SSE-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
+; SSE-NEXT:    ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
+; AVX-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> poison, double %ext, i32 2
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; SSE-LABEL: @src_ins3_v4f64_ext1_v2f64(
+; SSE-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; SSE-NEXT:    [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
+; SSE-NEXT:    ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins3_v4f64_ext1_v2f64(
+; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
+; AVX-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> poison, double %ext, i32 3
+  ret <4 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 0
+  %ins = insertelement <2 x double> poison, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 1
+  %ins = insertelement <2 x double> poison, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 2
+  %ins = insertelement <2 x double> poison, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 3
+  %ins = insertelement <2 x double> poison, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 0
+  %ins = insertelement <2 x double> poison, double %ext, i32 1
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 1
+  %ins = insertelement <2 x double> poison, double %ext, i32 1
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 2
+  %ins = insertelement <2 x double> poison, double %ext, i32 1
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 3
+  %ins = insertelement <2 x double> poison, double %ext, i32 1
+  ret <2 x double> %ins
+}
+
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
new file mode 100644
index 00000000000000..6051e6ff512fe7
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
+
+
+define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> undef, double %ext, i32 0
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> undef, double %ext, i32 1
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 2
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> undef, double %ext, i32 2
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 3
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 0
+  %ins = insertelement <4 x double> undef, double %ext, i32 3
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> undef, double %ext, i32 0
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; SSE-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
+; SSE-NEXT:    ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; AVX-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; AVX-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
+; AVX-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> undef, double %ext, i32 1
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 2
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> undef, double %ext, i32 2
+  ret <4 x double> %ins
+}
+
+define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 0>
+; CHECK-NEXT:    ret <4 x double> [[INS]]
+;
+  %ext = extractelement <2 x double> %b, i32 1
+  %ins = insertelement <4 x double> undef, double %ext, i32 3
+  ret <4 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 0
+  %ins = insertelement <2 x double> undef, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 1
+  %ins = insertelement <2 x double> undef, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 2
+  %ins = insertelement <2 x double> undef, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0
+; CHECK-NEXT:    ret <2 x double> [[INS]]
+;
+  %ext = extractelement <4 x double> %b, i32 3
+  %ins = insertelement <2 x double> undef, double %ext, i32 0
+  ret <2 x double> %ins
+}
+
+define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/121216


More information about the llvm-commits mailing list