[llvm] [VectorCombine] Support simplification to scalar store for multiple insertelt (PR #132820)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 25 04:00:22 PDT 2025
https://github.com/ParkHanbum updated https://github.com/llvm/llvm-project/pull/132820
>From 288dd9f4a23221933b0ca2cf6413593bda63739a Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Mon, 24 Mar 2025 14:47:00 +0900
Subject: [PATCH 1/2] add testcases for upcoming patch
---
.../VectorCombine/load-insert-store.ll | 318 ++++++++++++++++++
1 file changed, 318 insertions(+)
diff --git a/llvm/test/Transforms/VectorCombine/load-insert-store.ll b/llvm/test/Transforms/VectorCombine/load-insert-store.ll
index 93565c1a708eb..9dcadb5ccf30a 100644
--- a/llvm/test/Transforms/VectorCombine/load-insert-store.ll
+++ b/llvm/test/Transforms/VectorCombine/load-insert-store.ll
@@ -16,6 +16,71 @@ entry:
ret void
}
+define void @insert_store2(ptr %q, i16 zeroext %s) {
+; CHECK-LABEL: @insert_store2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
+; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 6
+; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 7
+; CHECK-NEXT: store <8 x i16> [[VEC2]], ptr [[Q]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load <8 x i16>, ptr %q
+ %vec1 = insertelement <8 x i16> %0, i16 %s, i32 6
+ %vec2 = insertelement <8 x i16> %vec1, i16 %s, i32 7
+ store <8 x i16> %vec2, ptr %q, align 1
+ ret void
+}
+
+define void @insert_store3(ptr %q, i16 zeroext %s) {
+; CHECK-LABEL: @insert_store3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
+; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 5
+; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 6
+; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i16> [[VEC2]], i16 [[S]], i32 7
+; CHECK-NEXT: store <8 x i16> [[VEC3]], ptr [[Q]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load <8 x i16>, ptr %q
+ %vec1 = insertelement <8 x i16> %0, i16 %s, i32 5
+ %vec2 = insertelement <8 x i16> %vec1, i16 %s, i32 6
+ %vec3 = insertelement <8 x i16> %vec2, i16 %s, i32 7
+ store <8 x i16> %vec3, ptr %q, align 1
+ ret void
+}
+
+define void @insert_store8(ptr %q, i16 zeroext %s) {
+; CHECK-LABEL: @insert_store8(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
+; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 0
+; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 1
+; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i16> [[VEC2]], i16 [[S]], i32 2
+; CHECK-NEXT: [[VEC4:%.*]] = insertelement <8 x i16> [[VEC3]], i16 [[S]], i32 3
+; CHECK-NEXT: [[VEC5:%.*]] = insertelement <8 x i16> [[VEC4]], i16 [[S]], i32 4
+; CHECK-NEXT: [[VEC6:%.*]] = insertelement <8 x i16> [[VEC5]], i16 [[S]], i32 5
+; CHECK-NEXT: [[VEC7:%.*]] = insertelement <8 x i16> [[VEC6]], i16 [[S]], i32 6
+; CHECK-NEXT: [[VEC8:%.*]] = insertelement <8 x i16> [[VEC7]], i16 [[S]], i32 7
+; CHECK-NEXT: store <8 x i16> [[VEC8]], ptr [[Q]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load <8 x i16>, ptr %q
+ %vec1 = insertelement <8 x i16> %0, i16 %s, i32 0
+ %vec2 = insertelement <8 x i16> %vec1, i16 %s, i32 1
+ %vec3 = insertelement <8 x i16> %vec2, i16 %s, i32 2
+ %vec4 = insertelement <8 x i16> %vec3, i16 %s, i32 3
+ %vec5 = insertelement <8 x i16> %vec4, i16 %s, i32 4
+ %vec6 = insertelement <8 x i16> %vec5, i16 %s, i32 5
+ %vec7 = insertelement <8 x i16> %vec6, i16 %s, i32 6
+ %vec8 = insertelement <8 x i16> %vec7, i16 %s, i32 7
+ store <8 x i16> %vec8, ptr %q, align 1
+ ret void
+}
+
define void @insert_store_i16_align1(ptr %q, i16 zeroext %s) {
; CHECK-LABEL: @insert_store_i16_align1(
; CHECK-NEXT: entry:
@@ -827,3 +892,256 @@ bb:
declare i32 @bar(i32, i1) readonly
declare double @llvm.log2.f64(double)
+
+define void @insert_store_gap(ptr %q, i16 zeroext %s) {
+; CHECK-LABEL: @insert_store_gap(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
+; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 2
+; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 5
+; CHECK-NEXT: store <8 x i16> [[VEC2]], ptr [[Q]], align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load <8 x i16>, ptr %q
+ %vec1 = insertelement <8 x i16> %0, i16 %s, i32 2
+ %vec2 = insertelement <8 x i16> %vec1, i16 %s, i32 5
+ store <8 x i16> %vec2, ptr %q
+ ret void
+}
+
+define void @insert_store_reverse(ptr %q, i16 zeroext %s) {
+; CHECK-LABEL: @insert_store_reverse(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
+; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 7
+; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 6
+; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i16> [[VEC2]], i16 [[S]], i32 5
+; CHECK-NEXT: store <8 x i16> [[VEC3]], ptr [[Q]], align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load <8 x i16>, ptr %q
+ %vec1 = insertelement <8 x i16> %0, i16 %s, i32 7
+ %vec2 = insertelement <8 x i16> %vec1, i16 %s, i32 6
+ %vec3 = insertelement <8 x i16> %vec2, i16 %s, i32 5
+ store <8 x i16> %vec3, ptr %q
+ ret void
+}
+
+define void @insert_store_duplicate(ptr %q, i16 zeroext %s) {
+; CHECK-LABEL: @insert_store_duplicate(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
+; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3
+; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 3
+; CHECK-NEXT: store <8 x i16> [[VEC2]], ptr [[Q]], align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load <8 x i16>, ptr %q
+ %vec1 = insertelement <8 x i16> %0, i16 %s, i32 3
+ %vec2 = insertelement <8 x i16> %vec1, i16 %s, i32 3
+ store <8 x i16> %vec2, ptr %q
+ ret void
+}
+
+define void @insert_store_i32(ptr %q, i32 zeroext %s) {
+; CHECK-LABEL: @insert_store_i32(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[Q:%.*]], align 16
+; CHECK-NEXT: [[VEC1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[S:%.*]], i32 2
+; CHECK-NEXT: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[S]], i32 3
+; CHECK-NEXT: store <4 x i32> [[VEC2]], ptr [[Q]], align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load <4 x i32>, ptr %q
+ %vec1 = insertelement <4 x i32> %0, i32 %s, i32 2
+ %vec2 = insertelement <4 x i32> %vec1, i32 %s, i32 3
+ store <4 x i32> %vec2, ptr %q
+ ret void
+}
+
+define void @insert_store_i8(ptr %q, i8 zeroext %s) {
+; CHECK-LABEL: @insert_store_i8(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; CHECK-NEXT: [[VEC1:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 8
+; CHECK-NEXT: [[VEC2:%.*]] = insertelement <16 x i8> [[VEC1]], i8 [[S]], i32 9
+; CHECK-NEXT: store <16 x i8> [[VEC2]], ptr [[Q]], align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load <16 x i8>, ptr %q
+ %vec1 = insertelement <16 x i8> %0, i8 %s, i32 8
+ %vec2 = insertelement <16 x i8> %vec1, i8 %s, i32 9
+ store <16 x i8> %vec2, ptr %q
+ ret void
+}
+
+define void @insert_store_alignment(ptr %q, i16 zeroext %s) {
+; CHECK-LABEL: @insert_store_alignment(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
+; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 0
+; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 4
+; CHECK-NEXT: store <8 x i16> [[VEC2]], ptr [[Q]], align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load <8 x i16>, ptr %q, align 16
+ %vec1 = insertelement <8 x i16> %0, i16 %s, i32 0
+ %vec2 = insertelement <8 x i16> %vec1, i16 %s, i32 4
+ store <8 x i16> %vec2, ptr %q, align 16
+ ret void
+}
+
+define void @insert_store_size(ptr %q, i16 zeroext %s) {
+; CHECK-LABEL: @insert_store_size(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i16>, ptr [[Q:%.*]], align 32
+; CHECK-NEXT: [[VEC1:%.*]] = insertelement <16 x i16> [[TMP0]], i16 [[S:%.*]], i32 8
+; CHECK-NEXT: [[VEC2:%.*]] = insertelement <16 x i16> [[VEC1]], i16 [[S]], i32 12
+; CHECK-NEXT: store <16 x i16> [[VEC2]], ptr [[Q]], align 32
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load <16 x i16>, ptr %q
+ %vec1 = insertelement <16 x i16> %0, i16 %s, i32 8
+ %vec2 = insertelement <16 x i16> %vec1, i16 %s, i32 12
+ store <16 x i16> %vec2, ptr %q
+ ret void
+}
+
+define void @insert_store_nonconst4(ptr %q, i8 zeroext %s, i32 %idx1, i32 %idx2, i32 %idx3, i32 %idx4) {
+; CHECK-LABEL: @insert_store_nonconst4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
+; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX1:%.*]]
+; CHECK-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[S]], i32 [[IDX2:%.*]]
+; CHECK-NEXT: [[VECINS3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[S]], i32 [[IDX3:%.*]]
+; CHECK-NEXT: [[VECINS4:%.*]] = insertelement <16 x i8> [[VECINS3]], i8 [[S]], i32 [[IDX4:%.*]]
+; CHECK-NEXT: store <16 x i8> [[VECINS4]], ptr [[Q]], align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load <16 x i8>, ptr %q
+ %vecins1 = insertelement <16 x i8> %0, i8 %s, i32 %idx1
+ %vecins2 = insertelement <16 x i8> %vecins1, i8 %s, i32 %idx2
+ %vecins3 = insertelement <16 x i8> %vecins2, i8 %s, i32 %idx3
+ %vecins4 = insertelement <16 x i8> %vecins3, i8 %s, i32 %idx4
+ store <16 x i8> %vecins4, ptr %q
+ ret void
+}
+
+define void @insert_store_vscale_nonconst2(ptr %q, i8 zeroext %s, i32 %idx1, i32 %idx2) {
+; CHECK-LABEL: @insert_store_vscale_nonconst2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i8>, ptr [[Q:%.*]], align 16
+; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <vscale x 16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX1:%.*]]
+; CHECK-NEXT: [[VECINS2:%.*]] = insertelement <vscale x 16 x i8> [[VECINS1]], i8 [[S]], i32 [[IDX2:%.*]]
+; CHECK-NEXT: store <vscale x 16 x i8> [[VECINS2]], ptr [[Q]], align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load <vscale x 16 x i8>, ptr %q
+ %vecins1 = insertelement <vscale x 16 x i8> %0, i8 %s, i32 %idx1
+ %vecins2 = insertelement <vscale x 16 x i8> %vecins1, i8 %s, i32 %idx2
+ store <vscale x 16 x i8> %vecins2, ptr %q
+ ret void
+}
+
+define void @insert_store_nonconst_large_alignment2(ptr %q, i32 zeroext %s, i32 %idx1, i32 %idx2) {
+; CHECK-LABEL: @insert_store_nonconst_large_alignment2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[IDX1:%.*]], 4
+; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IDX2:%.*]], 4
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]])
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP2]])
+; CHECK-NEXT: [[I:%.*]] = load <4 x i32>, ptr [[Q:%.*]], align 128
+; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <4 x i32> [[I]], i32 [[S:%.*]], i32 [[IDX1]]
+; CHECK-NEXT: [[VECINS2:%.*]] = insertelement <4 x i32> [[VECINS1]], i32 [[S]], i32 [[IDX2]]
+; CHECK-NEXT: store <4 x i32> [[VECINS2]], ptr [[Q]], align 128
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp1 = icmp ult i32 %idx1, 4
+ %cmp2 = icmp ult i32 %idx2, 4
+ call void @llvm.assume(i1 %cmp1)
+ call void @llvm.assume(i1 %cmp2)
+ %i = load <4 x i32>, ptr %q, align 128
+ %vecins1 = insertelement <4 x i32> %i, i32 %s, i32 %idx1
+ %vecins2 = insertelement <4 x i32> %vecins1, i32 %s, i32 %idx2
+ store <4 x i32> %vecins2, ptr %q, align 128
+ ret void
+}
+
+define void @insert_store_nonconst_align_maximum_8_2(ptr %q, i64 %s, i32 %idx1, i32 %idx2) {
+; CHECK-LABEL: @insert_store_nonconst_align_maximum_8_2(
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[IDX1:%.*]], 2
+; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IDX2:%.*]], 2
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]])
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP2]])
+; CHECK-NEXT: [[I:%.*]] = load <8 x i64>, ptr [[Q:%.*]], align 8
+; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <8 x i64> [[I]], i64 [[S:%.*]], i32 [[IDX1]]
+; CHECK-NEXT: [[VECINS2:%.*]] = insertelement <8 x i64> [[VECINS1]], i64 [[S]], i32 [[IDX2]]
+; CHECK-NEXT: store <8 x i64> [[VECINS2]], ptr [[Q]], align 8
+; CHECK-NEXT: ret void
+;
+ %cmp1 = icmp ult i32 %idx1, 2
+ %cmp2 = icmp ult i32 %idx2, 2
+ call void @llvm.assume(i1 %cmp1)
+ call void @llvm.assume(i1 %cmp2)
+ %i = load <8 x i64>, ptr %q, align 8
+ %vecins1 = insertelement <8 x i64> %i, i64 %s, i32 %idx1
+ %vecins2 = insertelement <8 x i64> %vecins1, i64 %s, i32 %idx2
+ store <8 x i64> %vecins2, ptr %q, align 8
+ ret void
+}
+
+define void @insert_store_nonconst_align_maximum_4_2(ptr %q, i64 %s, i32 %idx1, i32 %idx2) {
+; CHECK-LABEL: @insert_store_nonconst_align_maximum_4_2(
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[IDX1:%.*]], 2
+; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IDX2:%.*]], 2
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]])
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP2]])
+; CHECK-NEXT: [[I:%.*]] = load <8 x i64>, ptr [[Q:%.*]], align 4
+; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <8 x i64> [[I]], i64 [[S:%.*]], i32 [[IDX1]]
+; CHECK-NEXT: [[VECINS2:%.*]] = insertelement <8 x i64> [[VECINS1]], i64 [[S]], i32 [[IDX2]]
+; CHECK-NEXT: store <8 x i64> [[VECINS2]], ptr [[Q]], align 4
+; CHECK-NEXT: ret void
+;
+ %cmp1 = icmp ult i32 %idx1, 2
+ %cmp2 = icmp ult i32 %idx2, 2
+ call void @llvm.assume(i1 %cmp1)
+ call void @llvm.assume(i1 %cmp2)
+ %i = load <8 x i64>, ptr %q, align 4
+ %vecins1 = insertelement <8 x i64> %i, i64 %s, i32 %idx1
+ %vecins2 = insertelement <8 x i64> %vecins1, i64 %s, i32 %idx2
+ store <8 x i64> %vecins2, ptr %q, align 4
+ ret void
+}
+
+define void @insert_store_nonconst_align_larger_2(ptr %q, i64 %s, i32 %idx1, i32 %idx2) {
+; CHECK-LABEL: @insert_store_nonconst_align_larger_2(
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[IDX1:%.*]], 2
+; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IDX2:%.*]], 2
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]])
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP2]])
+; CHECK-NEXT: [[I:%.*]] = load <8 x i64>, ptr [[Q:%.*]], align 4
+; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <8 x i64> [[I]], i64 [[S:%.*]], i32 [[IDX1]]
+; CHECK-NEXT: [[VECINS2:%.*]] = insertelement <8 x i64> [[VECINS1]], i64 [[S]], i32 [[IDX2]]
+; CHECK-NEXT: store <8 x i64> [[VECINS2]], ptr [[Q]], align 2
+; CHECK-NEXT: ret void
+;
+ %cmp1 = icmp ult i32 %idx1, 2
+ %cmp2 = icmp ult i32 %idx2, 2
+ call void @llvm.assume(i1 %cmp1)
+ call void @llvm.assume(i1 %cmp2)
+ %i = load <8 x i64>, ptr %q, align 4
+ %vecins1 = insertelement <8 x i64> %i, i64 %s, i32 %idx1
+ %vecins2 = insertelement <8 x i64> %vecins1, i64 %s, i32 %idx2
+ store <8 x i64> %vecins2, ptr %q, align 2
+ ret void
+}
>From e589ed008dd3085221dc9d40627640cc5705c21c Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Mon, 24 Mar 2025 15:59:37 +0900
Subject: [PATCH 2/2] [VectorCombine] Support simplification to scalar store
for multiple insertelt
Previously, we supported simplifying load-insertelt-store to getelementptr-store
when only one insertelt exists.
This patch supports multiple insertelements.
Proof: https://alive2.llvm.org/ce/z/QTspTf
---
.../Transforms/Vectorize/VectorCombine.cpp | 102 ++++++++-----
.../VectorCombine/load-insert-store.ll | 136 +++++++++---------
2 files changed, 138 insertions(+), 100 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4bfe41a5ed00d..483a344d33fb7 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -115,7 +115,7 @@ class VectorCombine {
bool scalarizeVPIntrinsic(Instruction &I);
bool foldExtractedCmps(Instruction &I);
bool foldBinopOfReductions(Instruction &I);
- bool foldSingleElementStore(Instruction &I);
+ bool foldInsertElementsStore(Instruction &I);
bool scalarizeLoadExtract(Instruction &I);
bool foldConcatOfBoolMasks(Instruction &I);
bool foldPermuteOfBinops(Instruction &I);
@@ -1493,58 +1493,88 @@ static Align computeAlignmentAfterScalarization(Align VectorAlignment,
// %0 = bitcast <4 x i32>* %a to i32*
// %1 = getelementptr inbounds i32, i32* %0, i64 0, i64 1
// store i32 %b, i32* %1
-bool VectorCombine::foldSingleElementStore(Instruction &I) {
+bool VectorCombine::foldInsertElementsStore(Instruction &I) {
auto *SI = cast<StoreInst>(&I);
if (!SI->isSimple() || !isa<VectorType>(SI->getValueOperand()->getType()))
return false;
- // TODO: Combine more complicated patterns (multiple insert) by referencing
- // TargetTransformInfo.
- Instruction *Source;
- Value *NewElement;
- Value *Idx;
- if (!match(SI->getValueOperand(),
- m_InsertElt(m_Instruction(Source), m_Value(NewElement),
- m_Value(Idx))))
- return false;
-
- if (auto *Load = dyn_cast<LoadInst>(Source)) {
- auto VecTy = cast<VectorType>(SI->getValueOperand()->getType());
- Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts();
- // Don't optimize for atomic/volatile load or store. Ensure memory is not
- // modified between, vector type matches store size, and index is inbounds.
- if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
- !DL->typeSizeEqualsStoreSize(Load->getType()->getScalarType()) ||
- SrcAddr != SI->getPointerOperand()->stripPointerCasts())
- return false;
+ Value *Source = SI->getValueOperand();
+ // Track back multiple inserts.
+ SmallVector<std::pair<Value *, Value *>, 4> InsertElements;
+ Value *Base = Source;
+ while (auto *Insert = dyn_cast<InsertElementInst>(Base)) {
+ if (!Insert->hasOneUse())
+ break;
+ Value *InsertVal = Insert->getOperand(1);
+ Value *Idx = Insert->getOperand(2);
+ InsertElements.push_back({InsertVal, Idx});
+ Base = Insert->getOperand(0);
+ }
- auto ScalarizableIdx = canScalarizeAccess(VecTy, Idx, Load, AC, DT);
- if (ScalarizableIdx.isUnsafe() ||
- isMemModifiedBetween(Load->getIterator(), SI->getIterator(),
- MemoryLocation::get(SI), AA))
- return false;
+ if (InsertElements.empty())
+ return false;
- // Ensure we add the load back to the worklist BEFORE its users so they can
- // erased in the correct order.
- Worklist.push(Load);
+ auto *Load = dyn_cast<LoadInst>(Base);
+ if (!Load)
+ return false;
+ auto VecTy = cast<VectorType>(SI->getValueOperand()->getType());
+ Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts();
+ // Don't optimize for atomic/volatile load or store. Ensure memory is not
+ // modified between, vector type matches store size, and index is inbounds.
+ if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
+ !DL->typeSizeEqualsStoreSize(Load->getType()->getScalarType()) ||
+ SrcAddr != SI->getPointerOperand()->stripPointerCasts())
+ return false;
+
+ if (isMemModifiedBetween(Load->getIterator(), SI->getIterator(),
+ MemoryLocation::get(SI), AA))
+ return false;
+
+ for (size_t i = 0; i < InsertElements.size(); i++) {
+ Value *Idx = InsertElements[i].second;
+ auto ScalarizableIdx = canScalarizeAccess(VecTy, Idx, Load, AC, DT);
+ if (ScalarizableIdx.isUnsafe())
+ return false;
if (ScalarizableIdx.isSafeWithFreeze())
ScalarizableIdx.freeze(Builder, *cast<Instruction>(Idx));
+ }
+
+ // Ensure we add the load back to the worklist BEFORE its users so they can
+ // erased in the correct order.
+ Worklist.push(Load);
+ stable_sort(InsertElements, [](const std::pair<Value *, Value *> &A,
+ const std::pair<Value *, Value *> &B) {
+ bool AIsConst = isa<ConstantInt>(A.second);
+ bool BIsConst = isa<ConstantInt>(B.second);
+ if (AIsConst != BIsConst)
+ return AIsConst;
+
+ if (AIsConst && BIsConst)
+ return cast<ConstantInt>(A.second)->getZExtValue() <
+ cast<ConstantInt>(B.second)->getZExtValue();
+ return false;
+ });
+
+ StoreInst *NSI;
+ for (size_t i = 0; i < InsertElements.size(); i++) {
+ Value *InsertVal = InsertElements[i].first;
+ Value *Idx = InsertElements[i].second;
+
Value *GEP = Builder.CreateInBoundsGEP(
SI->getValueOperand()->getType(), SI->getPointerOperand(),
{ConstantInt::get(Idx->getType(), 0), Idx});
- StoreInst *NSI = Builder.CreateStore(NewElement, GEP);
+ NSI = Builder.CreateStore(InsertVal, GEP);
NSI->copyMetadata(*SI);
Align ScalarOpAlignment = computeAlignmentAfterScalarization(
- std::max(SI->getAlign(), Load->getAlign()), NewElement->getType(), Idx,
+ std::max(SI->getAlign(), Load->getAlign()), InsertVal->getType(), Idx,
*DL);
NSI->setAlignment(ScalarOpAlignment);
- replaceValue(I, *NSI);
- eraseInstruction(I);
- return true;
}
- return false;
+ replaceValue(I, *NSI);
+ eraseInstruction(I);
+ return true;
}
/// Try to scalarize vector loads feeding extractelement instructions.
@@ -3527,7 +3557,7 @@ bool VectorCombine::run() {
}
if (Opcode == Instruction::Store)
- MadeChange |= foldSingleElementStore(I);
+ MadeChange |= foldInsertElementsStore(I);
// If this is an early pipeline invocation of this pass, we are done.
if (TryEarlyFoldsOnly)
diff --git a/llvm/test/Transforms/VectorCombine/load-insert-store.ll b/llvm/test/Transforms/VectorCombine/load-insert-store.ll
index 9dcadb5ccf30a..33b4562844720 100644
--- a/llvm/test/Transforms/VectorCombine/load-insert-store.ll
+++ b/llvm/test/Transforms/VectorCombine/load-insert-store.ll
@@ -19,10 +19,10 @@ entry:
define void @insert_store2(ptr %q, i16 zeroext %s) {
; CHECK-LABEL: @insert_store2(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
-; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 6
-; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 7
-; CHECK-NEXT: store <8 x i16> [[VEC2]], ptr [[Q]], align 1
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q:%.*]], i32 0, i32 6
+; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 7
+; CHECK-NEXT: store i16 [[S]], ptr [[TMP1]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -36,11 +36,12 @@ entry:
define void @insert_store3(ptr %q, i16 zeroext %s) {
; CHECK-LABEL: @insert_store3(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
-; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 5
-; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 6
-; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i16> [[VEC2]], i16 [[S]], i32 7
-; CHECK-NEXT: store <8 x i16> [[VEC3]], ptr [[Q]], align 1
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q:%.*]], i32 0, i32 5
+; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 2
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 6
+; CHECK-NEXT: store i16 [[S]], ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 7
+; CHECK-NEXT: store i16 [[S]], ptr [[TMP2]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -55,16 +56,22 @@ entry:
define void @insert_store8(ptr %q, i16 zeroext %s) {
; CHECK-LABEL: @insert_store8(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
-; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 0
-; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 1
-; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i16> [[VEC2]], i16 [[S]], i32 2
-; CHECK-NEXT: [[VEC4:%.*]] = insertelement <8 x i16> [[VEC3]], i16 [[S]], i32 3
-; CHECK-NEXT: [[VEC5:%.*]] = insertelement <8 x i16> [[VEC4]], i16 [[S]], i32 4
-; CHECK-NEXT: [[VEC6:%.*]] = insertelement <8 x i16> [[VEC5]], i16 [[S]], i32 5
-; CHECK-NEXT: [[VEC7:%.*]] = insertelement <8 x i16> [[VEC6]], i16 [[S]], i32 6
-; CHECK-NEXT: [[VEC8:%.*]] = insertelement <8 x i16> [[VEC7]], i16 [[S]], i32 7
-; CHECK-NEXT: store <8 x i16> [[VEC8]], ptr [[Q]], align 1
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q:%.*]], i32 0, i32 0
+; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 1
+; CHECK-NEXT: store i16 [[S]], ptr [[TMP1]], align 2
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 2
+; CHECK-NEXT: store i16 [[S]], ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 3
+; CHECK-NEXT: store i16 [[S]], ptr [[TMP3]], align 2
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 4
+; CHECK-NEXT: store i16 [[S]], ptr [[TMP4]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 5
+; CHECK-NEXT: store i16 [[S]], ptr [[TMP5]], align 2
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 6
+; CHECK-NEXT: store i16 [[S]], ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 7
+; CHECK-NEXT: store i16 [[S]], ptr [[TMP7]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -896,10 +903,10 @@ declare double @llvm.log2.f64(double)
define void @insert_store_gap(ptr %q, i16 zeroext %s) {
; CHECK-LABEL: @insert_store_gap(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
-; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 2
-; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 5
-; CHECK-NEXT: store <8 x i16> [[VEC2]], ptr [[Q]], align 16
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q:%.*]], i32 0, i32 2
+; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 5
+; CHECK-NEXT: store i16 [[S]], ptr [[TMP1]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -913,11 +920,12 @@ entry:
define void @insert_store_reverse(ptr %q, i16 zeroext %s) {
; CHECK-LABEL: @insert_store_reverse(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
-; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 7
-; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 6
-; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i16> [[VEC2]], i16 [[S]], i32 5
-; CHECK-NEXT: store <8 x i16> [[VEC3]], ptr [[Q]], align 16
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q:%.*]], i32 0, i32 5
+; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 2
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 6
+; CHECK-NEXT: store i16 [[S]], ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 7
+; CHECK-NEXT: store i16 [[S]], ptr [[TMP2]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -932,10 +940,10 @@ entry:
define void @insert_store_duplicate(ptr %q, i16 zeroext %s) {
; CHECK-LABEL: @insert_store_duplicate(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
-; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3
-; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 3
-; CHECK-NEXT: store <8 x i16> [[VEC2]], ptr [[Q]], align 16
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q:%.*]], i32 0, i32 3
+; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 2
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 3
+; CHECK-NEXT: store i16 [[S]], ptr [[TMP1]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -949,10 +957,10 @@ entry:
define void @insert_store_i32(ptr %q, i32 zeroext %s) {
; CHECK-LABEL: @insert_store_i32(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[Q:%.*]], align 16
-; CHECK-NEXT: [[VEC1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[S:%.*]], i32 2
-; CHECK-NEXT: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[S]], i32 3
-; CHECK-NEXT: store <4 x i32> [[VEC2]], ptr [[Q]], align 16
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[Q:%.*]], i32 0, i32 2
+; CHECK-NEXT: store i32 [[S:%.*]], ptr [[TMP0]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[Q]], i32 0, i32 3
+; CHECK-NEXT: store i32 [[S]], ptr [[TMP1]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -966,10 +974,10 @@ entry:
define void @insert_store_i8(ptr %q, i8 zeroext %s) {
; CHECK-LABEL: @insert_store_i8(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16
-; CHECK-NEXT: [[VEC1:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 8
-; CHECK-NEXT: [[VEC2:%.*]] = insertelement <16 x i8> [[VEC1]], i8 [[S]], i32 9
-; CHECK-NEXT: store <16 x i8> [[VEC2]], ptr [[Q]], align 16
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[Q:%.*]], i32 0, i32 8
+; CHECK-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i8>, ptr [[Q]], i32 0, i32 9
+; CHECK-NEXT: store i8 [[S]], ptr [[TMP1]], align 1
; CHECK-NEXT: ret void
;
entry:
@@ -983,10 +991,10 @@ entry:
define void @insert_store_alignment(ptr %q, i16 zeroext %s) {
; CHECK-LABEL: @insert_store_alignment(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[Q:%.*]], align 16
-; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 0
-; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[S]], i32 4
-; CHECK-NEXT: store <8 x i16> [[VEC2]], ptr [[Q]], align 16
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q:%.*]], i32 0, i32 0
+; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i16>, ptr [[Q]], i32 0, i32 4
+; CHECK-NEXT: store i16 [[S]], ptr [[TMP1]], align 8
; CHECK-NEXT: ret void
;
entry:
@@ -1000,10 +1008,10 @@ entry:
define void @insert_store_size(ptr %q, i16 zeroext %s) {
; CHECK-LABEL: @insert_store_size(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i16>, ptr [[Q:%.*]], align 32
-; CHECK-NEXT: [[VEC1:%.*]] = insertelement <16 x i16> [[TMP0]], i16 [[S:%.*]], i32 8
-; CHECK-NEXT: [[VEC2:%.*]] = insertelement <16 x i16> [[VEC1]], i16 [[S]], i32 12
-; CHECK-NEXT: store <16 x i16> [[VEC2]], ptr [[Q]], align 32
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i16>, ptr [[Q:%.*]], i32 0, i32 8
+; CHECK-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i16>, ptr [[Q]], i32 0, i32 12
+; CHECK-NEXT: store i16 [[S]], ptr [[TMP1]], align 8
; CHECK-NEXT: ret void
;
entry:
@@ -1059,10 +1067,10 @@ define void @insert_store_nonconst_large_alignment2(ptr %q, i32 zeroext %s, i32
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IDX2:%.*]], 4
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]])
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP2]])
-; CHECK-NEXT: [[I:%.*]] = load <4 x i32>, ptr [[Q:%.*]], align 128
-; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <4 x i32> [[I]], i32 [[S:%.*]], i32 [[IDX1]]
-; CHECK-NEXT: [[VECINS2:%.*]] = insertelement <4 x i32> [[VECINS1]], i32 [[S]], i32 [[IDX2]]
-; CHECK-NEXT: store <4 x i32> [[VECINS2]], ptr [[Q]], align 128
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[Q:%.*]], i32 0, i32 [[IDX2]]
+; CHECK-NEXT: store i32 [[S:%.*]], ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[Q]], i32 0, i32 [[IDX1]]
+; CHECK-NEXT: store i32 [[S]], ptr [[TMP1]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -1083,10 +1091,10 @@ define void @insert_store_nonconst_align_maximum_8_2(ptr %q, i64 %s, i32 %idx1,
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IDX2:%.*]], 2
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]])
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP2]])
-; CHECK-NEXT: [[I:%.*]] = load <8 x i64>, ptr [[Q:%.*]], align 8
-; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <8 x i64> [[I]], i64 [[S:%.*]], i32 [[IDX1]]
-; CHECK-NEXT: [[VECINS2:%.*]] = insertelement <8 x i64> [[VECINS1]], i64 [[S]], i32 [[IDX2]]
-; CHECK-NEXT: store <8 x i64> [[VECINS2]], ptr [[Q]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q:%.*]], i32 0, i32 [[IDX2]]
+; CHECK-NEXT: store i64 [[S:%.*]], ptr [[TMP1]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q]], i32 0, i32 [[IDX1]]
+; CHECK-NEXT: store i64 [[S]], ptr [[TMP2]], align 8
; CHECK-NEXT: ret void
;
%cmp1 = icmp ult i32 %idx1, 2
@@ -1106,10 +1114,10 @@ define void @insert_store_nonconst_align_maximum_4_2(ptr %q, i64 %s, i32 %idx1,
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IDX2:%.*]], 2
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]])
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP2]])
-; CHECK-NEXT: [[I:%.*]] = load <8 x i64>, ptr [[Q:%.*]], align 4
-; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <8 x i64> [[I]], i64 [[S:%.*]], i32 [[IDX1]]
-; CHECK-NEXT: [[VECINS2:%.*]] = insertelement <8 x i64> [[VECINS1]], i64 [[S]], i32 [[IDX2]]
-; CHECK-NEXT: store <8 x i64> [[VECINS2]], ptr [[Q]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q:%.*]], i32 0, i32 [[IDX2]]
+; CHECK-NEXT: store i64 [[S:%.*]], ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q]], i32 0, i32 [[IDX1]]
+; CHECK-NEXT: store i64 [[S]], ptr [[TMP2]], align 4
; CHECK-NEXT: ret void
;
%cmp1 = icmp ult i32 %idx1, 2
@@ -1129,10 +1137,10 @@ define void @insert_store_nonconst_align_larger_2(ptr %q, i64 %s, i32 %idx1, i32
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IDX2:%.*]], 2
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]])
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP2]])
-; CHECK-NEXT: [[I:%.*]] = load <8 x i64>, ptr [[Q:%.*]], align 4
-; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <8 x i64> [[I]], i64 [[S:%.*]], i32 [[IDX1]]
-; CHECK-NEXT: [[VECINS2:%.*]] = insertelement <8 x i64> [[VECINS1]], i64 [[S]], i32 [[IDX2]]
-; CHECK-NEXT: store <8 x i64> [[VECINS2]], ptr [[Q]], align 2
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q:%.*]], i32 0, i32 [[IDX2]]
+; CHECK-NEXT: store i64 [[S:%.*]], ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x i64>, ptr [[Q]], i32 0, i32 [[IDX1]]
+; CHECK-NEXT: store i64 [[S]], ptr [[TMP2]], align 4
; CHECK-NEXT: ret void
;
%cmp1 = icmp ult i32 %idx1, 2
More information about the llvm-commits
mailing list