[llvm] [VectorCombine] Expand `vector_insert` into shufflevector for earlier cost optimizations (#145512) (PR #146479)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 1 01:52:46 PDT 2025
https://github.com/laurenmchin updated https://github.com/llvm/llvm-project/pull/146479
>From 27569450c58c82fd717007da9510944993660a04 Mon Sep 17 00:00:00 2001
From: Lauren <lchin at berkeley.edu>
Date: Tue, 1 Jul 2025 02:55:20 -0400
Subject: [PATCH 1/2] [VectorCombine] Expand `vector_insert` into shufflevector
for earlier cost optimizations (#145512)
Move folding logic from `InstCombineCalls` to `VectorCombine` to ensure `vector_insert` intrinsics are expanded into shufflevector instructions before cost-based shuffle optimizations run.
Canonicalizes fixed-width vectors only.
---
.../InstCombine/InstCombineCalls.cpp | 46 ------------
.../Transforms/Vectorize/VectorCombine.cpp | 71 +++++++++++++++++++
.../VectorCombine/fold-vector-insert.ll | 71 +++++++++++++++++++
3 files changed, 142 insertions(+), 46 deletions(-)
create mode 100644 llvm/test/Transforms/VectorCombine/fold-vector-insert.ll
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 5b398d3b75f59..df29024a86f67 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3462,52 +3462,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
break;
}
- case Intrinsic::vector_insert: {
- Value *Vec = II->getArgOperand(0);
- Value *SubVec = II->getArgOperand(1);
- Value *Idx = II->getArgOperand(2);
- auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
- auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
- auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
-
- // Only canonicalize if the destination vector, Vec, and SubVec are all
- // fixed vectors.
- if (DstTy && VecTy && SubVecTy) {
- unsigned DstNumElts = DstTy->getNumElements();
- unsigned VecNumElts = VecTy->getNumElements();
- unsigned SubVecNumElts = SubVecTy->getNumElements();
- unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
-
- // An insert that entirely overwrites Vec with SubVec is a nop.
- if (VecNumElts == SubVecNumElts)
- return replaceInstUsesWith(CI, SubVec);
-
- // Widen SubVec into a vector of the same width as Vec, since
- // shufflevector requires the two input vectors to be the same width.
- // Elements beyond the bounds of SubVec within the widened vector are
- // undefined.
- SmallVector<int, 8> WidenMask;
- unsigned i;
- for (i = 0; i != SubVecNumElts; ++i)
- WidenMask.push_back(i);
- for (; i != VecNumElts; ++i)
- WidenMask.push_back(PoisonMaskElem);
-
- Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
-
- SmallVector<int, 8> Mask;
- for (unsigned i = 0; i != IdxN; ++i)
- Mask.push_back(i);
- for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
- Mask.push_back(i);
- for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
- Mask.push_back(i);
-
- Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
- return replaceInstUsesWith(CI, Shuffle);
- }
- break;
- }
case Intrinsic::vector_extract: {
Value *Vec = II->getArgOperand(0);
Value *Idx = II->getArgOperand(1);
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 19e82099e87f0..dbbc6c5a07ec8 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -112,6 +112,7 @@ class VectorCombine {
bool foldExtractExtract(Instruction &I);
bool foldInsExtFNeg(Instruction &I);
bool foldInsExtBinop(Instruction &I);
+ bool foldVectorInsertToShuffle(Instruction &I);
bool foldInsExtVectorToShuffle(Instruction &I);
bool foldBitOpOfBitcasts(Instruction &I);
bool foldBitcastShuffle(Instruction &I);
@@ -804,6 +805,73 @@ bool VectorCombine::foldInsExtBinop(Instruction &I) {
return true;
}
+/// Try to fold vector_insert intrinsics into shufflevector instructions.
+bool VectorCombine::foldVectorInsertToShuffle(Instruction &I) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
+ // This optimization only applies to vector_insert intrinsics.
+ if (!II || II->getIntrinsicID() != Intrinsic::vector_insert)
+ return false;
+
+ Value *Vec = II->getArgOperand(0);
+ Value *SubVec = II->getArgOperand(1);
+ Value *Idx = II->getArgOperand(2);
+
+ // Caller guarantees DstTy is a fixed vector.
+ auto *DstTy = cast<FixedVectorType>(II->getType());
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
+
+ // Only canonicalize if Vec and SubVec are both fixed vectors.
+ if (!VecTy || !SubVecTy)
+ return false;
+
+ unsigned DstNumElts = DstTy->getNumElements();
+ unsigned VecNumElts = VecTy->getNumElements();
+ unsigned SubVecNumElts = SubVecTy->getNumElements();
+ auto *SubVecPtr = dyn_cast<ConstantInt>(Idx);
+ if (!SubVecPtr)
+ return false;
+
+ unsigned SubVecIdx = SubVecPtr->getZExtValue();
+
+ // Ensure insertion of SubVec doesn't exceed Dst bounds.
+ if (SubVecIdx % SubVecNumElts != 0 || SubVecIdx + SubVecNumElts > DstNumElts)
+ return false;
+
+ // An insert that entirely overwrites Vec with SubVec is a nop.
+ if (VecNumElts == SubVecNumElts) {
+ replaceValue(I, *SubVec);
+ return true;
+ }
+
+ // Widen SubVec into a vector of the same width as Vec, since
+ // shufflevector requires the two input vectors to be the same width.
+ // Elements beyond the bounds of SubVec within the widened vector are
+ // undefined.
+ SmallVector<int, 8> WidenMask;
+ unsigned int i = 0;
+ for (i = 0; i != SubVecNumElts; ++i)
+ WidenMask.push_back(i);
+ for (; i != VecNumElts; ++i)
+ WidenMask.push_back(PoisonMaskElem);
+
+ auto *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
+ Worklist.pushValue(WidenShuffle);
+
+ SmallVector<int, 8> Mask;
+ unsigned int j;
+ for (i = 0; i != SubVecIdx; ++i)
+ Mask.push_back(i);
+ for (j = 0; j != SubVecNumElts; ++j)
+ Mask.push_back(DstNumElts + j);
+ for (i = SubVecIdx + SubVecNumElts; i != DstNumElts; ++i)
+ Mask.push_back(i);
+
+ auto *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
+ replaceValue(I, *Shuffle);
+ return true;
+}
+
bool VectorCombine::foldBitOpOfBitcasts(Instruction &I) {
// Match: bitop(bitcast(x), bitcast(y)) -> bitcast(bitop(x, y))
Value *LHSSrc, *RHSSrc;
@@ -3639,6 +3707,9 @@ bool VectorCombine::run() {
// dispatching to folding functions if there's no chance of matching.
if (IsFixedVectorType) {
switch (Opcode) {
+ case Instruction::Call:
+ MadeChange |= foldVectorInsertToShuffle(I);
+ break;
case Instruction::InsertElement:
MadeChange |= vectorizeLoadInsert(I);
break;
diff --git a/llvm/test/Transforms/VectorCombine/fold-vector-insert.ll b/llvm/test/Transforms/VectorCombine/fold-vector-insert.ll
new file mode 100644
index 0000000000000..976fdb322005b
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/fold-vector-insert.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=vector-combine -S | FileCheck %s
+
+declare <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32>, <4 x i32>, i64)
+declare <8 x i32> @llvm.vector.insert.v8i32.v8i32(<8 x i32>, <8 x i32>, i64)
+declare <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32>, <2 x i32>, i64)
+declare <8 x i32> @llvm.vector.insert.v8i32.v1i32(<8 x i32>, <1 x i32>, i64)
+declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v2i32(<vscale x 4 x i32>, <2 x i32>, i64)
+
+define <8 x i32> @vector_insert_begin(<8 x i32> %vec, <4 x i32> %subvec) {
+; CHECK-LABEL: define <8 x i32> @vector_insert_begin(
+; CHECK-SAME: <8 x i32> [[VEC:%.*]], <4 x i32> [[SUBVEC:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> [[TMP1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <8 x i32> [[RESULT]]
+;
+ %result = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 0)
+ ret <8 x i32> %result
+}
+
+define <8 x i32> @vector_insert_middle(<8 x i32> %vec, <2 x i32> %subvec) {
+; CHECK-LABEL: define <8 x i32> @vector_insert_middle(
+; CHECK-SAME: <8 x i32> [[VEC:%.*]], <2 x i32> [[SUBVEC:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <8 x i32> [[RESULT]]
+;
+ %result = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 2)
+ ret <8 x i32> %result
+}
+
+define <8 x i32> @vector_insert_end(<8 x i32> %vec, <4 x i32> %subvec) {
+; CHECK-LABEL: define <8 x i32> @vector_insert_end(
+; CHECK-SAME: <8 x i32> [[VEC:%.*]], <4 x i32> [[SUBVEC:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: ret <8 x i32> [[RESULT]]
+;
+ %result = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 4)
+ ret <8 x i32> %result
+}
+
+define <8 x i32> @vector_insert_overwrite(<8 x i32> %vec, <8 x i32> %subvec) {
+; CHECK-LABEL: define <8 x i32> @vector_insert_overwrite(
+; CHECK-SAME: <8 x i32> [[VEC:%.*]], <8 x i32> [[SUBVEC:%.*]]) {
+; CHECK-NEXT: ret <8 x i32> [[SUBVEC]]
+;
+ %result = call <8 x i32> @llvm.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 0)
+ ret <8 x i32> %result
+}
+
+define <8 x i32> @vector_insert_single_element_at_end(<8 x i32> %vec, <1 x i32> %subvec) {
+; CHECK-LABEL: define <8 x i32> @vector_insert_single_element_at_end(
+; CHECK-SAME: <8 x i32> [[VEC:%.*]], <1 x i32> [[SUBVEC:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i32> [[SUBVEC]], <1 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8>
+; CHECK-NEXT: ret <8 x i32> [[RESULT]]
+;
+ %result = call <8 x i32> @llvm.vector.insert.v8i32.v1i32(<8 x i32> %vec, <1 x i32> %subvec, i64 7)
+ ret <8 x i32> %result
+}
+
+define <vscale x 4 x i32> @vector_insert_no_fold_scalable(<vscale x 4 x i32> %vec, <2 x i32> %subvec) {
+; CHECK-LABEL: define <vscale x 4 x i32> @vector_insert_no_fold_scalable(
+; CHECK-SAME: <vscale x 4 x i32> [[VEC:%.*]], <2 x i32> [[SUBVEC:%.*]]) {
+; CHECK-NEXT: [[RESULT:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v2i32(<vscale x 4 x i32> [[VEC]], <2 x i32> [[SUBVEC]], i64 0)
+; CHECK-NEXT: ret <vscale x 4 x i32> [[RESULT]]
+;
+ %result = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v2i32(<vscale x 4 x i32> %vec, <2 x i32> %subvec, i64 0)
+ ret <vscale x 4 x i32> %result
+}
>From fc894c8d658554dbb04e5a99c3c8f1aeef986220 Mon Sep 17 00:00:00 2001
From: Lauren <lchin at berkeley.edu>
Date: Tue, 1 Jul 2025 04:52:10 -0400
Subject: [PATCH 2/2] [VectorCombine] Move canonicalize-vector-insert tests
from InstCombine to VectorCombine
---
.../canonicalize-vector-insert.ll | 19 +++--
.../VectorCombine/fold-vector-insert.ll | 71 -------------------
2 files changed, 15 insertions(+), 75 deletions(-)
rename llvm/test/Transforms/{InstCombine => VectorCombine}/canonicalize-vector-insert.ll (84%)
delete mode 100644 llvm/test/Transforms/VectorCombine/fold-vector-insert.ll
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll b/llvm/test/Transforms/VectorCombine/canonicalize-vector-insert.ll
similarity index 84%
rename from llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll
rename to llvm/test/Transforms/VectorCombine/canonicalize-vector-insert.ll
index ab7a50e55db0f..af6fe52c07920 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll
+++ b/llvm/test/Transforms/VectorCombine/canonicalize-vector-insert.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+; RUN: opt < %s -passes=vector-combine -S | FileCheck %s
; llvm.vector.insert canonicalizes to shufflevector in the fixed case. In the
; scalable case, we lower to the INSERT_SUBVECTOR ISD node.
@@ -31,7 +31,7 @@ define <8 x i32> @trivial_nop(<8 x i32> %vec, <8 x i32> %subvec) {
define <8 x i32> @valid_insertion_a(<8 x i32> %vec, <2 x i32> %subvec) {
; CHECK-LABEL: @valid_insertion_a(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 0)
@@ -71,7 +71,7 @@ define <8 x i32> @valid_insertion_d(<8 x i32> %vec, <2 x i32> %subvec) {
define <8 x i32> @valid_insertion_e(<8 x i32> %vec, <4 x i32> %subvec) {
; CHECK-LABEL: @valid_insertion_e(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 0)
@@ -91,7 +91,7 @@ define <8 x i32> @valid_insertion_f(<8 x i32> %vec, <4 x i32> %subvec) {
define <8 x i32> @valid_insertion_g(<8 x i32> %vec, <3 x i32> %subvec) {
; CHECK-LABEL: @valid_insertion_g(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 0)
@@ -108,6 +108,17 @@ define <8 x i32> @valid_insertion_h(<8 x i32> %vec, <3 x i32> %subvec) {
ret <8 x i32> %1
}
+; Tests insertion at middle index
+define <8 x i32> @valid_insertion_i(<8 x i32> %vec, <2 x i32> %subvec) {
+; CHECK-LABEL: @valid_insertion_i(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <8 x i32> [[RESULT]]
+;
+ %result = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 2)
+ ret <8 x i32> %result
+}
+
; ============================================================================ ;
; Scalable cases
; ============================================================================ ;
diff --git a/llvm/test/Transforms/VectorCombine/fold-vector-insert.ll b/llvm/test/Transforms/VectorCombine/fold-vector-insert.ll
deleted file mode 100644
index 976fdb322005b..0000000000000
--- a/llvm/test/Transforms/VectorCombine/fold-vector-insert.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -passes=vector-combine -S | FileCheck %s
-
-declare <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32>, <4 x i32>, i64)
-declare <8 x i32> @llvm.vector.insert.v8i32.v8i32(<8 x i32>, <8 x i32>, i64)
-declare <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32>, <2 x i32>, i64)
-declare <8 x i32> @llvm.vector.insert.v8i32.v1i32(<8 x i32>, <1 x i32>, i64)
-declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v2i32(<vscale x 4 x i32>, <2 x i32>, i64)
-
-define <8 x i32> @vector_insert_begin(<8 x i32> %vec, <4 x i32> %subvec) {
-; CHECK-LABEL: define <8 x i32> @vector_insert_begin(
-; CHECK-SAME: <8 x i32> [[VEC:%.*]], <4 x i32> [[SUBVEC:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> [[TMP1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: ret <8 x i32> [[RESULT]]
-;
- %result = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 0)
- ret <8 x i32> %result
-}
-
-define <8 x i32> @vector_insert_middle(<8 x i32> %vec, <2 x i32> %subvec) {
-; CHECK-LABEL: define <8 x i32> @vector_insert_middle(
-; CHECK-SAME: <8 x i32> [[VEC:%.*]], <2 x i32> [[SUBVEC:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: ret <8 x i32> [[RESULT]]
-;
- %result = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 2)
- ret <8 x i32> %result
-}
-
-define <8 x i32> @vector_insert_end(<8 x i32> %vec, <4 x i32> %subvec) {
-; CHECK-LABEL: define <8 x i32> @vector_insert_end(
-; CHECK-SAME: <8 x i32> [[VEC:%.*]], <4 x i32> [[SUBVEC:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT: ret <8 x i32> [[RESULT]]
-;
- %result = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 4)
- ret <8 x i32> %result
-}
-
-define <8 x i32> @vector_insert_overwrite(<8 x i32> %vec, <8 x i32> %subvec) {
-; CHECK-LABEL: define <8 x i32> @vector_insert_overwrite(
-; CHECK-SAME: <8 x i32> [[VEC:%.*]], <8 x i32> [[SUBVEC:%.*]]) {
-; CHECK-NEXT: ret <8 x i32> [[SUBVEC]]
-;
- %result = call <8 x i32> @llvm.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 0)
- ret <8 x i32> %result
-}
-
-define <8 x i32> @vector_insert_single_element_at_end(<8 x i32> %vec, <1 x i32> %subvec) {
-; CHECK-LABEL: define <8 x i32> @vector_insert_single_element_at_end(
-; CHECK-SAME: <8 x i32> [[VEC:%.*]], <1 x i32> [[SUBVEC:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i32> [[SUBVEC]], <1 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8>
-; CHECK-NEXT: ret <8 x i32> [[RESULT]]
-;
- %result = call <8 x i32> @llvm.vector.insert.v8i32.v1i32(<8 x i32> %vec, <1 x i32> %subvec, i64 7)
- ret <8 x i32> %result
-}
-
-define <vscale x 4 x i32> @vector_insert_no_fold_scalable(<vscale x 4 x i32> %vec, <2 x i32> %subvec) {
-; CHECK-LABEL: define <vscale x 4 x i32> @vector_insert_no_fold_scalable(
-; CHECK-SAME: <vscale x 4 x i32> [[VEC:%.*]], <2 x i32> [[SUBVEC:%.*]]) {
-; CHECK-NEXT: [[RESULT:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v2i32(<vscale x 4 x i32> [[VEC]], <2 x i32> [[SUBVEC]], i64 0)
-; CHECK-NEXT: ret <vscale x 4 x i32> [[RESULT]]
-;
- %result = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v2i32(<vscale x 4 x i32> %vec, <2 x i32> %subvec, i64 0)
- ret <vscale x 4 x i32> %result
-}
More information about the llvm-commits
mailing list