[llvm] [SLP]Keep externally used GEPs as GEPs, if possible instead of extractelement. (PR #88877)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 16 04:20:28 PDT 2024
https://github.com/alexey-bataev created https://github.com/llvm/llvm-project/pull/88877
If the vectorized GEP instruction can be still kept as a scalar GEP,
better to keep it as scalar instead of extractelement. In many cases it
is more profitable.
Metric: size..text
Program size..text
results results0 diff
test-suite :: SingleSource/Benchmarks/Misc/oourafft.test 18911.00 19695.00 4.1%
test-suite :: SingleSource/Benchmarks/Misc-C++-EH/spirit.test 59987.00 60707.00 1.2%
test-suite :: External/SPEC/CFP2017speed/638.imagick_s/638.imagick_s.test 1392209.00 1392753.00 0.0%
test-suite :: External/SPEC/CFP2017rate/538.imagick_r/538.imagick_r.test 1392209.00 1392753.00 0.0%
test-suite :: External/SPEC/CINT2006/400.perlbench/400.perlbench.test 1087996.00 1088236.00 0.0%
test-suite :: MultiSource/Benchmarks/Bullet/bullet.test 309310.00 309342.00 0.0%
test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test 664661.00 664693.00 0.0%
test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test 664661.00 664693.00 0.0%
test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 12354636.00 12354908.00 0.0%
test-suite :: External/SPEC/CFP2006/453.povray/453.povray.test 1152748.00 1152716.00 -0.0%
test-suite :: MultiSource/Applications/oggenc/oggenc.test 191787.00 191771.00 -0.0%
test-suite :: SingleSource/UnitTests/matrix-types-spec.test 480796.00 480476.00 -0.1%
Misc/oourafft - Extra code gets vectorized
Misc-C++-EH/spirit - same
CFP2017speed/638.imagick_s
CFP2017rate/538.imagick_r - same, extra code gets vectorized
CINT2006/400.perlbench - some extra 4 x ptr stores vectorized
Bullet/bullet - extra 4 x ptr store vectorized
CINT2017rate/525.x264_r
CINT2017speed/625.x264_s - same
CFP2017rate/526.blender_r - extra 8 x float stores (several), some extra
4 x ptr stores
CFP2006/453.povray - 2 x double loads/stores replaced by 4 x double
loads/stores
Applications/oggenc - extra code is vectorized
UnitTests/matrix-types-spec - extra code gets vectorized
>From ab8767efdcae901a5223d64b3741f3bb91d6e4b0 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Tue, 16 Apr 2024 11:20:18 +0000
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 50 ++++++++++++++++++-
.../SLPVectorizer/X86/extract_in_tree_user.ll | 4 +-
.../SLPVectorizer/X86/geps-non-pow-2.ll | 17 ++++---
.../SLPVectorizer/X86/opaque-ptr.ll | 19 +++----
.../X86/reorder-reused-masked-gather2.ll | 2 +-
.../SLPVectorizer/X86/stacksave-dependence.ll | 4 +-
6 files changed, 71 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c63b500f546f3b..c75fbb26f1362a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1134,6 +1134,7 @@ class BoUpSLP {
MustGather.clear();
EntryToLastInstruction.clear();
ExternalUses.clear();
+ ExternalUsesAsGEPs.clear();
for (auto &Iter : BlocksSchedules) {
BlockScheduling *BS = Iter.second.get();
BS->clear();
@@ -3154,6 +3155,10 @@ class BoUpSLP {
/// after vectorization.
UserList ExternalUses;
+ /// A list of GEPs which can be reaplced by scalar GEPs instead of
+ /// extractelement instructions.
+ SmallPtrSet<Value *, 4> ExternalUsesAsGEPs;
+
/// Values used only by @llvm.assume calls.
SmallPtrSet<const Value *, 32> EphValues;
@@ -5541,6 +5546,7 @@ void BoUpSLP::buildExternalUses(
<< FoundLane << " from " << *Scalar << ".\n");
ScalarToExtUses.try_emplace(Scalar, ExternalUses.size());
ExternalUses.emplace_back(Scalar, nullptr, FoundLane);
+ continue;
}
for (User *U : Scalar->users()) {
LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
@@ -9925,6 +9931,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
SmallVector<APInt> DemandedElts;
SmallDenseSet<Value *, 4> UsedInserts;
DenseSet<std::pair<const TreeEntry *, Type *>> VectorCasts;
+ std::optional<DenseMap<Value *, unsigned>> ValueToExtUses;
for (ExternalUser &EU : ExternalUses) {
// We only add extract cost once for the same scalar.
if (!isa_and_nonnull<InsertElementInst>(EU.User) &&
@@ -10033,12 +10040,40 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
}
}
}
+ // Leave the GEPs as is, they are free in most cases and better to keep them
+ // as GEPs.
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(EU.Scalar)) {
+ if (!ValueToExtUses) {
+ ValueToExtUses.emplace();
+ for_each(enumerate(ExternalUses), [&](const auto &P) {
+ ValueToExtUses->try_emplace(P.value().Scalar, P.index());
+ });
+ }
+ // Can use original GEP, if no operands vectorized or they are marked as
+ // externally used already.
+ bool CanBeUsedAsGEP = all_of(GEP->operands(), [&](Value *V) {
+ if (!getTreeEntry(V))
+ return true;
+ auto It = ValueToExtUses->find(V);
+ if (It != ValueToExtUses->end()) {
+ // Replace all uses to avoid compiler crash.
+ ExternalUses[It->second].User = nullptr;
+ return true;
+ }
+ return false;
+ });
+ if (CanBeUsedAsGEP) {
+ ExtractCost += TTI->getInstructionCost(GEP, CostKind);
+ ExternalUsesAsGEPs.insert(EU.Scalar);
+ continue;
+ }
+ }
// If we plan to rewrite the tree in a smaller type, we will need to sign
// extend the extracted value back to the original type. Here, we account
// for the extract and the added cost of the sign extend if needed.
auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth);
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
auto It = MinBWs.find(getTreeEntry(EU.Scalar));
if (It != MinBWs.end()) {
auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
@@ -13161,6 +13196,8 @@ Value *BoUpSLP::vectorizeTree(
if (Scalar->getType() != Vec->getType()) {
Value *Ex = nullptr;
Value *ExV = nullptr;
+ auto *GEP = dyn_cast<GetElementPtrInst>(Scalar);
+ bool ReplaceGEP = GEP && ExternalUsesAsGEPs.contains(GEP);
auto It = ScalarToEEs.find(Scalar);
if (It != ScalarToEEs.end()) {
// No need to emit many extracts, just move the only one in the
@@ -13186,6 +13223,15 @@ Value *BoUpSLP::vectorizeTree(
if (const TreeEntry *ETE = getTreeEntry(V))
V = ETE->VectorizedValue;
Ex = Builder.CreateExtractElement(V, ES->getIndexOperand());
+ } else if (ReplaceGEP) {
+ // Leave the GEPs as is, they are free in most cases and better to
+ // keep them as GEPs.
+ auto *CloneGEP = GEP->clone();
+ CloneGEP->insertBefore(*Builder.GetInsertBlock(),
+ Builder.GetInsertPoint());
+ if (GEP->hasName())
+ CloneGEP->takeName(GEP);
+ Ex = CloneGEP;
} else {
Ex = Builder.CreateExtractElement(Vec, Lane);
}
@@ -13224,6 +13270,8 @@ Value *BoUpSLP::vectorizeTree(
assert((ExternallyUsedValues.count(Scalar) ||
any_of(Scalar->users(),
[&](llvm::User *U) {
+ if (ExternalUsesAsGEPs.contains(U))
+ return true;
TreeEntry *UseEntry = getTreeEntry(U);
return UseEntry &&
(UseEntry->State == TreeEntry::Vectorize ||
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
index 096f57d100a50f..c600d75ed1e8c4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
@@ -13,7 +13,7 @@ define i32 @fn1() {
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> <i64 11, i64 56>
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 11
; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64>
; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[TMP4]], align 8
; CHECK-NEXT: ret i32 undef
@@ -92,7 +92,7 @@ define void @externally_used_ptrs() {
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> <i64 56, i64 11>
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 11
; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64>
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP5]], [[TMP6]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
index aa679743583064..e459cd8c6955b0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
@@ -13,25 +13,26 @@ define dso_local i32 @g() local_unnamed_addr {
; CHECK: while.body:
; CHECK-NEXT: [[C_022:%.*]] = phi ptr [ [[C_022_BE:%.*]], [[WHILE_BODY_BACKEDGE:%.*]] ], [ undef, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x ptr> [ [[TMP14:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ undef, [[ENTRY]] ]
-; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 1
-; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[C_022]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP9]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
+; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 1, i64 1>
; CHECK-NEXT: switch i32 [[TMP3]], label [[WHILE_BODY_BACKEDGE]] [
-; CHECK-NEXT: i32 2, label [[SW_BB:%.*]]
-; CHECK-NEXT: i32 4, label [[SW_BB6:%.*]]
+; CHECK-NEXT: i32 2, label [[SW_BB:%.*]]
+; CHECK-NEXT: i32 4, label [[SW_BB6:%.*]]
; CHECK-NEXT: ]
; CHECK: sw.bb:
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x ptr> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 2, i64 2>
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP4]], i32 1
-; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP9]], align 4
; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 2
+; CHECK-NEXT: store i32 [[TMP7]], ptr [[INCDEC_PTR1]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 2, i64 2>
; CHECK-NEXT: br label [[WHILE_BODY_BACKEDGE]]
; CHECK: sw.bb6:
; CHECK-NEXT: [[INCDEC_PTR8:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 2
+; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 1
; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[INCDEC_PTR]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 2, i64 2>
@@ -39,7 +40,7 @@ define dso_local i32 @g() local_unnamed_addr {
; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP13]], align 4
; CHECK-NEXT: br label [[WHILE_BODY_BACKEDGE]]
; CHECK: while.body.backedge:
-; CHECK-NEXT: [[C_022_BE]] = phi ptr [ [[INCDEC_PTR]], [[WHILE_BODY]] ], [ [[INCDEC_PTR8]], [[SW_BB6]] ], [ [[INCDEC_PTR5]], [[SW_BB]] ]
+; CHECK-NEXT: [[C_022_BE]] = phi ptr [ [[INCDEC_PTR1]], [[WHILE_BODY]] ], [ [[INCDEC_PTR8]], [[SW_BB6]] ], [ [[INCDEC_PTR5]], [[SW_BB]] ]
; CHECK-NEXT: [[TMP14]] = phi <2 x ptr> [ [[TMP4]], [[WHILE_BODY]] ], [ [[TMP12]], [[SW_BB6]] ], [ [[TMP8]], [[SW_BB]] ]
; CHECK-NEXT: br label [[WHILE_BODY]]
; CHECK: while.end:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll
index 3801fa5c787b6d..c40be9690cce1d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll
@@ -52,17 +52,14 @@ define void @test(ptr %r, ptr %p, ptr %q) #0 {
define void @test2(ptr %a, ptr %b) {
; CHECK-LABEL: @test2(
-; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 1
-; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 2
-; CHECK-NEXT: [[I1:%.*]] = ptrtoint ptr [[A1]] to i64
-; CHECK-NEXT: [[B3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 3
-; CHECK-NEXT: [[I2:%.*]] = ptrtoint ptr [[B3]] to i64
-; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[A1]], align 8
-; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A2]], align 8
-; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[I1]], [[V1]]
-; CHECK-NEXT: [[ADD2:%.*]] = add i64 [[I2]], [[V2]]
-; CHECK-NEXT: store i64 [[ADD1]], ptr [[A1]], align 8
-; CHECK-NEXT: store i64 [[ADD2]], ptr [[A2]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A:%.*]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[B:%.*]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> <i64 1, i64 3>
+; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr [[A1]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP4]], [[TMP5]]
+; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr [[A1]], align 8
; CHECK-NEXT: ret void
;
%a1 = getelementptr inbounds i64, ptr %a, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll
index ddc2a1b819041f..30f328293cdaa3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll
@@ -9,7 +9,7 @@ define void @"foo"(ptr addrspace(1) %0, ptr addrspace(1) %1) #0 {
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr addrspace(1)> poison, ptr addrspace(1) [[TMP0:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr addrspace(1)> [[TMP3]], <4 x ptr addrspace(1)> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <4 x ptr addrspace(1)> [[TMP4]], <4 x i64> <i64 8, i64 12, i64 28, i64 24>
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr addrspace(1)> [[TMP5]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 8
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1:%.*]], i64 8
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p1(<4 x ptr addrspace(1)> [[TMP5]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> poison)
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <8 x i32> <i32 0, i32 3, i32 0, i32 3, i32 2, i32 1, i32 2, i32 1>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll
index 0125e5fab089b2..e93c5244dfbe2c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll
@@ -35,7 +35,7 @@ define void @allocas(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1
; CHECK-NEXT: store ptr [[TMP4]], ptr [[A:%.*]], align 8
; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
; CHECK-NEXT: ret void
@@ -127,7 +127,7 @@ define void @stacksave2(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1
; CHECK-NEXT: store ptr [[TMP4]], ptr [[A:%.*]], align 8
; CHECK-NEXT: call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR5:[0-9]+]]
; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]])
More information about the llvm-commits
mailing list