[llvm] [InstCombine] Extend folding of aggregate construction to cases when source aggregates are partially available (PR #100828)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 22 16:25:37 PDT 2024
https://github.com/weiguozhi updated https://github.com/llvm/llvm-project/pull/100828
>From fc0de437b517a21e72349f72e7feca1fdc08053e Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Fri, 26 Jul 2024 15:26:21 -0700
Subject: [PATCH 1/4] [InstCombine] Extend folding of aggregate construction to
cases when source aggregates are partially available
Function foldAggregateConstructionIntoAggregateReuse can fold
insertvalue(phi(extractvalue(src1), extractvalue(src2)))
into
phi(src1, src2)
when we can find source aggregates in all predecessors.
This patch extends it to handle following case
insertvalue(phi(extractvalue(src1), elm2))
into
phi(src1, insertvalue(elm2))
with the condition that the predecessor without source aggregate has only one
successor.
---
.../InstCombine/InstCombineVectorOps.cpp | 33 ++-
.../fold-aggregate-reconstruction.ll | 215 ++++++++++++++++++
.../merging-multiple-stores-into-successor.ll | 7 +-
.../phi-aware-aggregate-reconstruction.ll | 26 +--
4 files changed, 260 insertions(+), 21 deletions(-)
create mode 100644 llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 753ed55523c843..5377bf6f033d00 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1106,6 +1106,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
// from which all the elements were originally extracted from?
// Note that we want for the map to have stable iteration order!
SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates;
+ bool FoundSrcAgg = false;
for (BasicBlock *Pred : Preds) {
std::pair<decltype(SourceAggregates)::iterator, bool> IV =
SourceAggregates.insert({Pred, nullptr});
@@ -1117,9 +1118,35 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
// aggregate produced by OrigIVI must have been originally extracted from
// the same aggregate. Is that so? Can we find said original aggregate?
SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
- if (Describe(SourceAggregate) != AggregateDescription::Found)
- return nullptr; // Give up.
- IV.first->second = *SourceAggregate;
+ if (Describe(SourceAggregate) == AggregateDescription::Found) {
+ FoundSrcAgg = true;
+ IV.first->second = *SourceAggregate;
+ } else {
+ // If UseBB is the single successor of Pred, we can add InsertValue to
+ // Pred.
+ if (succ_size(Pred) != 1)
+ return nullptr; // Give up.
+ }
+ }
+
+ if (!FoundSrcAgg)
+ return nullptr;
+
+ // For predecessors without appropriate source aggregate, create one in the
+ // predecessor.
+ for (auto &It : SourceAggregates) {
+ if (Describe(It.second) == AggregateDescription::Found)
+ continue;
+
+ BasicBlock *Pred = It.first;
+ Builder.SetInsertPoint(Pred, Pred->getTerminator()->getIterator());
+ Value *V = PoisonValue::get(AggTy);
+ for (auto I : enumerate(AggElts)) {
+ Value *Elt = (*I.value())->DoPHITranslation(UseBB, Pred);
+ V = Builder.CreateInsertValue(V, Elt, I.index());
+ }
+
+ It.second = V;
}
// All good! Now we just need to thread the source aggregates here.
diff --git a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
new file mode 100644
index 00000000000000..a75e9f1580ab96
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
@@ -0,0 +1,215 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+declare {ptr, i64} @bar(i64)
+
+; Basic test.
+define {ptr, i64} @test1(i1 %cond1, ptr %p1, ptr %p2) {
+; CHECK-LABEL: define { ptr, i64 } @test1(
+; CHECK-SAME: i1 [[COND1:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT: br i1 [[COND1]], label %[[BBB1:.*]], label %[[BBB2:.*]]
+; CHECK: [[BBB1]]:
+; CHECK-NEXT: [[CALL1:%.*]] = call { ptr, i64 } @bar(i64 0)
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[BBB2]]:
+; CHECK-NEXT: [[VAL21:%.*]] = load ptr, ptr [[P1]], align 8
+; CHECK-NEXT: [[VAL22:%.*]] = load i64, ptr [[P2]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr, i64 } poison, ptr [[VAL21]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr, i64 } [[TMP1]], i64 [[VAL22]], 1
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RES_MERGED:%.*]] = phi { ptr, i64 } [ [[CALL1]], %[[BBB1]] ], [ [[TMP2]], %[[BBB2]] ]
+; CHECK-NEXT: ret { ptr, i64 } [[RES_MERGED]]
+;
+ br i1 %cond1, label %bbb1, label %bbb2
+
+bbb1:
+ %call1 = call {ptr, i64} @bar(i64 0)
+ %val11 = extractvalue { ptr, i64 } %call1, 0
+ %val12 = extractvalue { ptr, i64 } %call1, 1
+ br label %exit
+
+bbb2:
+ %val21 = load ptr, ptr %p1
+ %val22 = load i64, ptr %p2
+ br label %exit
+
+exit:
+ %val1 = phi ptr [%val11, %bbb1], [%val21, %bbb2]
+ %val2 = phi i64 [%val12, %bbb1], [%val22, %bbb2]
+ %tmp = insertvalue { ptr, i64 } poison, ptr %val1, 0
+ %res = insertvalue { ptr, i64 } %tmp, i64 %val2, 1
+ ret {ptr, i64} %res
+}
+
+; Test with more predecessors.
+define {ptr, i64} @test2(i1 %cond1, i1 %cond2, ptr %p1, ptr %p2) {
+; CHECK-LABEL: define { ptr, i64 } @test2(
+; CHECK-SAME: i1 [[COND1:%.*]], i1 [[COND2:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT: br i1 [[COND1]], label %[[BBB1:.*]], label %[[BBB4:.*]]
+; CHECK: [[BBB1]]:
+; CHECK-NEXT: br i1 [[COND2]], label %[[BBB2:.*]], label %[[BBB3:.*]]
+; CHECK: [[BBB2]]:
+; CHECK-NEXT: [[CALL1:%.*]] = call { ptr, i64 } @bar(i64 0)
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[BBB3]]:
+; CHECK-NEXT: [[CALL2:%.*]] = call { ptr, i64 } @bar(i64 1)
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[BBB4]]:
+; CHECK-NEXT: [[VAL31:%.*]] = load ptr, ptr [[P1]], align 8
+; CHECK-NEXT: [[VAL32:%.*]] = load i64, ptr [[P2]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr, i64 } poison, ptr [[VAL31]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr, i64 } [[TMP1]], i64 [[VAL32]], 1
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RES_MERGED:%.*]] = phi { ptr, i64 } [ [[CALL1]], %[[BBB2]] ], [ [[CALL2]], %[[BBB3]] ], [ [[TMP2]], %[[BBB4]] ]
+; CHECK-NEXT: ret { ptr, i64 } [[RES_MERGED]]
+;
+ br i1 %cond1, label %bbb1, label %bbb4
+
+bbb1:
+ br i1 %cond2, label %bbb2, label %bbb3
+
+bbb2:
+ %call1 = call {ptr, i64} @bar(i64 0)
+ %val11 = extractvalue { ptr, i64 } %call1, 0
+ %val12 = extractvalue { ptr, i64 } %call1, 1
+ br label %exit
+
+bbb3:
+ %call2 = call {ptr, i64} @bar(i64 1)
+ %val21 = extractvalue { ptr, i64 } %call2, 0
+ %val22 = extractvalue { ptr, i64 } %call2, 1
+ br label %exit
+
+bbb4:
+ %val31 = load ptr, ptr %p1
+ %val32 = load i64, ptr %p2
+ br label %exit
+
+exit:
+ %val1 = phi ptr [%val11, %bbb2], [%val21, %bbb3], [%val31, %bbb4]
+ %val2 = phi i64 [%val12, %bbb2], [%val22, %bbb3], [%val32, %bbb4]
+ %tmp = insertvalue { ptr, i64 } poison, ptr %val1, 0
+ %res = insertvalue { ptr, i64 } %tmp, i64 %val2, 1
+ ret {ptr, i64} %res
+}
+
+; Test with multiple PHI instructions.
+define {ptr, i64} @test3(i1 %cond1, i1 %cond2, ptr %val31, i64 %val32) {
+; CHECK-LABEL: define { ptr, i64 } @test3(
+; CHECK-SAME: i1 [[COND1:%.*]], i1 [[COND2:%.*]], ptr [[VAL31:%.*]], i64 [[VAL32:%.*]]) {
+; CHECK-NEXT: br i1 [[COND1]], label %[[BBB1:.*]], label %[[BBB4:.*]]
+; CHECK: [[BBB1]]:
+; CHECK-NEXT: br i1 [[COND2]], label %[[BBB2:.*]], label %[[BBB3:.*]]
+; CHECK: [[BBB2]]:
+; CHECK-NEXT: [[CALL1:%.*]] = call { ptr, i64 } @bar(i64 0)
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[BBB3]]:
+; CHECK-NEXT: [[CALL2:%.*]] = call { ptr, i64 } @bar(i64 1)
+; CHECK-NEXT: br label %[[BBB5:.*]]
+; CHECK: [[BBB4]]:
+; CHECK-NEXT: [[CALL3:%.*]] = call { ptr, i64 } @bar(i64 2)
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr, i64 } poison, ptr [[VAL31]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr, i64 } [[TMP1]], i64 [[VAL32]], 1
+; CHECK-NEXT: br label %[[BBB5]]
+; CHECK: [[BBB5]]:
+; CHECK-NEXT: [[DOTMERGED:%.*]] = phi { ptr, i64 } [ [[CALL2]], %[[BBB3]] ], [ [[TMP2]], %[[BBB4]] ]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RES_MERGED:%.*]] = phi { ptr, i64 } [ [[CALL1]], %[[BBB2]] ], [ [[DOTMERGED]], %[[BBB5]] ]
+; CHECK-NEXT: ret { ptr, i64 } [[RES_MERGED]]
+;
+ br i1 %cond1, label %bbb1, label %bbb4
+
+bbb1:
+ br i1 %cond2, label %bbb2, label %bbb3
+
+bbb2:
+ %call1 = call {ptr, i64} @bar(i64 0)
+ %val11 = extractvalue { ptr, i64 } %call1, 0
+ %val12 = extractvalue { ptr, i64 } %call1, 1
+ br label %exit
+
+bbb3:
+ %call2 = call {ptr, i64} @bar(i64 1)
+ %val21 = extractvalue { ptr, i64 } %call2, 0
+ %val22 = extractvalue { ptr, i64 } %call2, 1
+ br label %bbb5
+
+bbb4:
+ %call3 = call {ptr, i64} @bar(i64 2)
+ br label %bbb5
+
+bbb5:
+ %val41 = phi ptr [%val21, %bbb3], [%val31, %bbb4]
+ %val42 = phi i64 [%val22, %bbb3], [%val32, %bbb4]
+ br label %exit
+
+exit:
+ %val1 = phi ptr [%val11, %bbb2], [%val41, %bbb5]
+ %val2 = phi i64 [%val12, %bbb2], [%val42, %bbb5]
+ %tmp = insertvalue { ptr, i64 } poison, ptr %val1, 0
+ %res = insertvalue { ptr, i64 } %tmp, i64 %val2, 1
+ ret {ptr, i64} %res
+}
+
+; Negative test, bbb4 has multiple successors, so we don't add insertvalue to it.
+define {ptr, i64} @test4(i1 %cond1, i1 %cond2, ptr %p1, ptr %p2) {
+; CHECK-LABEL: define { ptr, i64 } @test4(
+; CHECK-SAME: i1 [[COND1:%.*]], i1 [[COND2:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT: br i1 [[COND1]], label %[[BBB1:.*]], label %[[BBB4:.*]]
+; CHECK: [[BBB1]]:
+; CHECK-NEXT: br i1 [[COND2]], label %[[BBB2:.*]], label %[[BBB3:.*]]
+; CHECK: [[BBB2]]:
+; CHECK-NEXT: [[CALL1:%.*]] = call { ptr, i64 } @bar(i64 0)
+; CHECK-NEXT: [[VAL11:%.*]] = extractvalue { ptr, i64 } [[CALL1]], 0
+; CHECK-NEXT: [[VAL12:%.*]] = extractvalue { ptr, i64 } [[CALL1]], 1
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[BBB3]]:
+; CHECK-NEXT: [[CALL2:%.*]] = call { ptr, i64 } @bar(i64 1)
+; CHECK-NEXT: [[VAL21:%.*]] = extractvalue { ptr, i64 } [[CALL2]], 0
+; CHECK-NEXT: [[VAL22:%.*]] = extractvalue { ptr, i64 } [[CALL2]], 1
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[BBB4]]:
+; CHECK-NEXT: [[VAL31:%.*]] = load ptr, ptr [[P1]], align 8
+; CHECK-NEXT: [[VAL32:%.*]] = load i64, ptr [[P2]], align 4
+; CHECK-NEXT: [[COND3_NOT:%.*]] = icmp eq i64 [[VAL32]], 0
+; CHECK-NEXT: br i1 [[COND3_NOT]], label %[[EXIT]], label %[[BBB4]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[VAL1:%.*]] = phi ptr [ [[VAL11]], %[[BBB2]] ], [ [[VAL21]], %[[BBB3]] ], [ [[VAL31]], %[[BBB4]] ]
+; CHECK-NEXT: [[VAL2:%.*]] = phi i64 [ [[VAL12]], %[[BBB2]] ], [ [[VAL22]], %[[BBB3]] ], [ [[VAL32]], %[[BBB4]] ]
+; CHECK-NEXT: [[TMP:%.*]] = insertvalue { ptr, i64 } poison, ptr [[VAL1]], 0
+; CHECK-NEXT: [[RES:%.*]] = insertvalue { ptr, i64 } [[TMP]], i64 [[VAL2]], 1
+; CHECK-NEXT: ret { ptr, i64 } [[RES]]
+;
+ br i1 %cond1, label %bbb1, label %bbb4
+
+bbb1:
+ br i1 %cond2, label %bbb2, label %bbb3
+
+bbb2:
+ %call1 = call {ptr, i64} @bar(i64 0)
+ %val11 = extractvalue { ptr, i64 } %call1, 0
+ %val12 = extractvalue { ptr, i64 } %call1, 1
+ br label %exit
+
+bbb3:
+ %call2 = call {ptr, i64} @bar(i64 1)
+ %val21 = extractvalue { ptr, i64 } %call2, 0
+ %val22 = extractvalue { ptr, i64 } %call2, 1
+ br label %exit
+
+bbb4:
+ %val31 = load ptr, ptr %p1
+ %val32 = load i64, ptr %p2
+ %cond3 = icmp ne i64 %val32, 0
+ br i1 %cond3, label %bbb4, label %exit
+
+exit:
+ %val1 = phi ptr [%val11, %bbb2], [%val21, %bbb3], [%val31, %bbb4]
+ %val2 = phi i64 [%val12, %bbb2], [%val22, %bbb3], [%val32, %bbb4]
+ %tmp = insertvalue { ptr, i64 } poison, ptr %val1, 0
+ %res = insertvalue { ptr, i64 } %tmp, i64 %val2, 1
+ ret {ptr, i64} %res
+}
diff --git a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll
index fbf58d47a32d20..41e3197e5a2f0b 100644
--- a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll
+++ b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll
@@ -166,14 +166,13 @@ define %struct.half @one_elem_struct_merge(i1 %cond, %struct.half %a, half %b) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
; CHECK: BB0:
-; CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_HALF:%.*]] [[A:%.*]], 0
; CHECK-NEXT: br label [[SINK:%.*]]
; CHECK: BB1:
+; CHECK-NEXT: [[TMP0:%.*]] = insertvalue [[STRUCT_HALF:%.*]] poison, half [[B:%.*]], 0
; CHECK-NEXT: br label [[SINK]]
; CHECK: sink:
-; CHECK-NEXT: [[STOREMERGE:%.*]] = phi half [ [[TMP0]], [[BB0]] ], [ [[B:%.*]], [[BB1]] ]
-; CHECK-NEXT: [[VAL1:%.*]] = insertvalue [[STRUCT_HALF]] poison, half [[STOREMERGE]], 0
-; CHECK-NEXT: ret [[STRUCT_HALF]] [[VAL1]]
+; CHECK-NEXT: [[VAL1_MERGED:%.*]] = phi [[STRUCT_HALF]] [ [[A:%.*]], [[BB0]] ], [ [[TMP0]], [[BB1]] ]
+; CHECK-NEXT: ret [[STRUCT_HALF]] [[VAL1_MERGED]]
;
entry:
%alloca = alloca i64
diff --git a/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll b/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll
index 6b7ac445839d25..706ce4e02f2311 100644
--- a/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll
+++ b/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll
@@ -97,28 +97,26 @@ end:
ret { i32, i32 } %i8
}
-; When coming from %left, elements are swapped
-define { i32, i32 } @negative_test2({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 %c) {
-; CHECK-LABEL: @negative_test2(
+; When coming from %left, elements are swapped, and new insertvalue is created.
+; From right side the old aggregate can be directly reused.
+define { i32, i32 } @positive_test2({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 %c) {
+; CHECK-LABEL: @positive_test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[C:%.*]], label [[LEFT:%.*]], label [[RIGHT:%.*]]
; CHECK: left:
; CHECK-NEXT: [[I2:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT:%.*]], 1
; CHECK-NEXT: [[I0:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT]], 0
; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: [[TMP0:%.*]] = insertvalue { i32, i32 } poison, i32 [[I2]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i32 } [[TMP0]], i32 [[I0]], 1
; CHECK-NEXT: br label [[END:%.*]]
; CHECK: right:
-; CHECK-NEXT: [[I4:%.*]] = extractvalue { i32, i32 } [[AGG_RIGHT:%.*]], 1
-; CHECK-NEXT: [[I3:%.*]] = extractvalue { i32, i32 } [[AGG_RIGHT]], 0
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[I5:%.*]] = phi i32 [ [[I2]], [[LEFT]] ], [ [[I3]], [[RIGHT]] ]
-; CHECK-NEXT: [[I6:%.*]] = phi i32 [ [[I0]], [[LEFT]] ], [ [[I4]], [[RIGHT]] ]
+; CHECK-NEXT: [[I8_MERGED:%.*]] = phi { i32, i32 } [ [[TMP1]], [[LEFT]] ], [ [[AGG_RIGHT:%.*]], [[RIGHT]] ]
; CHECK-NEXT: call void @baz()
-; CHECK-NEXT: [[I7:%.*]] = insertvalue { i32, i32 } undef, i32 [[I5]], 0
-; CHECK-NEXT: [[I8:%.*]] = insertvalue { i32, i32 } [[I7]], i32 [[I6]], 1
-; CHECK-NEXT: ret { i32, i32 } [[I8]]
+; CHECK-NEXT: ret { i32, i32 } [[I8_MERGED]]
;
entry:
%i0 = extractvalue { i32, i32 } %agg_left, 0
@@ -417,14 +415,14 @@ define { i32, i32 } @test8({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 %
; CHECK: left:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: switch i32 [[VAL_LEFT:%.*]], label [[IMPOSSIBLE:%.*]] [
-; CHECK-NEXT: i32 -42, label [[END:%.*]]
-; CHECK-NEXT: i32 42, label [[END]]
+; CHECK-NEXT: i32 -42, label [[END:%.*]]
+; CHECK-NEXT: i32 42, label [[END]]
; CHECK-NEXT: ]
; CHECK: right:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: switch i32 [[VAL_RIGHT:%.*]], label [[IMPOSSIBLE]] [
-; CHECK-NEXT: i32 42, label [[END]]
-; CHECK-NEXT: i32 -42, label [[END]]
+; CHECK-NEXT: i32 42, label [[END]]
+; CHECK-NEXT: i32 -42, label [[END]]
; CHECK-NEXT: ]
; CHECK: impossible:
; CHECK-NEXT: unreachable
>From 11dba855dea44775edd7d3392de0e1e5b796e2c6 Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Mon, 29 Jul 2024 15:24:52 -0700
Subject: [PATCH 2/4] If a value is defined in UseBB, we can't use it in
PredBB, so we can't add an insertvalue to PredBB.
---
.../InstCombine/InstCombineVectorOps.cpp | 11 +++++-
.../fold-aggregate-reconstruction.ll | 37 +++++++++++++++++++
2 files changed, 47 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 5377bf6f033d00..d48654263b3b20 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -963,6 +963,9 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
return AggregateDescription::Found;
};
+ // If an aggregate element is defined in UseBB, we can't use it in PredBB.
+ bool EltDefinedInUseBB = false;
+
// Given the value \p Elt that was being inserted into element \p EltIdx of an
// aggregate AggTy, see if \p Elt was originally defined by an
// appropriate extractvalue (same element index, same aggregate type).
@@ -972,8 +975,11 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
[&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
// For now(?), only deal with, at most, a single level of PHI indirection.
- if (UseBB && PredBB)
+ if (UseBB && PredBB) {
Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
+ if (Elt && Elt->getParent() == *UseBB)
+ EltDefinedInUseBB = true;
+ }
// FIXME: deal with multiple levels of PHI indirection?
// Did we find an extraction?
@@ -1138,6 +1144,9 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
if (Describe(It.second) == AggregateDescription::Found)
continue;
+ if (EltDefinedInUseBB)
+ return nullptr;
+
BasicBlock *Pred = It.first;
Builder.SetInsertPoint(Pred, Pred->getTerminator()->getIterator());
Value *V = PoisonValue::get(AggTy);
diff --git a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
index a75e9f1580ab96..a06f3006268a80 100644
--- a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
+++ b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
@@ -213,3 +213,40 @@ exit:
%res = insertvalue { ptr, i64 } %tmp, i64 %val2, 1
ret {ptr, i64} %res
}
+
+; Negative test, %.elt2 is defined in bb %5, it can't be accessed from %3,
+; so we can't add insertvalue to %3.
+define { ptr, i64 } @test5({ ptr, i64 } %0, ptr %1, i1 %.not) {
+; CHECK-LABEL: define { ptr, i64 } @test5(
+; CHECK-SAME: { ptr, i64 } [[TMP0:%.*]], ptr [[TMP1:%.*]], i1 [[DOTNOT:%.*]]) {
+; CHECK-NEXT: br i1 [[DOTNOT]], label %[[BB3:.*]], label %[[BB4:.*]]
+; CHECK: [[BB3]]:
+; CHECK-NEXT: store ptr null, ptr [[TMP1]], align 8
+; CHECK-NEXT: br label %[[BB5:.*]]
+; CHECK: [[BB4]]:
+; CHECK-NEXT: [[DOTELT1:%.*]] = extractvalue { ptr, i64 } [[TMP0]], 0
+; CHECK-NEXT: br label %[[BB5]]
+; CHECK: [[BB5]]:
+; CHECK-NEXT: [[TMP6:%.*]] = phi ptr [ [[DOTELT1]], %[[BB4]] ], [ null, %[[BB3]] ]
+; CHECK-NEXT: [[DOTELT2:%.*]] = extractvalue { ptr, i64 } [[TMP0]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i64 } zeroinitializer, ptr [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { ptr, i64 } [[TMP7]], i64 [[DOTELT2]], 1
+; CHECK-NEXT: ret { ptr, i64 } [[TMP8]]
+;
+ br i1 %.not, label %3, label %4
+
+3: ; preds = %2
+ store ptr null, ptr %1, align 8
+ br label %5
+
+4: ; preds = %2
+ %.elt1 = extractvalue { ptr, i64 } %0, 0
+ br label %5
+
+5: ; preds = %4, %3
+ %6 = phi ptr [ %.elt1, %4 ], [ null, %3 ]
+ %.elt2 = extractvalue { ptr, i64 } %0, 1
+ %7 = insertvalue { ptr, i64 } zeroinitializer, ptr %6, 0
+ %8 = insertvalue { ptr, i64 } %7, i64 %.elt2, 1
+ ret { ptr, i64 } %8
+}
>From a2fee0f44b602b3321f22a6f8d740f515562467f Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Tue, 30 Jul 2024 16:17:12 -0700
Subject: [PATCH 3/4] Prevent add insertvalue to inner loops.
---
.../InstCombine/InstCombineVectorOps.cpp | 29 ++-
.../fold-aggregate-reconstruction.ll | 190 +++++++++++++++---
2 files changed, 186 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index d48654263b3b20..dafa3948ae8939 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -1138,15 +1139,39 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
if (!FoundSrcAgg)
return nullptr;
- // For predecessors without appropriate source aggregate, create one in the
- // predecessor.
+ // Do some sanity check if we need to add insertvalue into predecessors.
+ Loop *OrigLoop, *UseLoop;
+ if (LI) {
+ OrigLoop = LI->getLoopFor(OrigIVI.getParent());
+ UseLoop = LI->getLoopFor(UseBB);
+ }
for (auto &It : SourceAggregates) {
if (Describe(It.second) == AggregateDescription::Found)
continue;
+ // Element is defined in UseBB, so it can't be used in predecessors.
if (EltDefinedInUseBB)
return nullptr;
+ if (LI) {
+ // Don't add insertvalue into inner loops.
+ Loop *PredLoop = LI->getLoopFor(It.first);
+ if (OrigLoop != PredLoop && (!OrigLoop || OrigLoop->contains(PredLoop)))
+ return nullptr;
+
+ // Don't cross loop header.
+ if (UseLoop && UseLoop->getHeader() == UseBB &&
+ UseLoop->contains(It.first))
+ return nullptr;
+ }
+ }
+
+ // For predecessors without appropriate source aggregate, create one in the
+ // predecessor.
+ for (auto &It : SourceAggregates) {
+ if (Describe(It.second) == AggregateDescription::Found)
+ continue;
+
BasicBlock *Pred = It.first;
Builder.SetInsertPoint(Pred, Pred->getTerminator()->getIterator());
Value *V = PoisonValue::get(AggTy);
diff --git a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
index a06f3006268a80..9753b4d234fb8b 100644
--- a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
+++ b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+; RUN: opt -S -passes='instcombine<use-loop-info>' < %s | FileCheck %s
declare {ptr, i64} @bar(i64)
@@ -214,39 +214,167 @@ exit:
ret {ptr, i64} %res
}
-; Negative test, %.elt2 is defined in bb %5, it can't be accessed from %3,
-; so we can't add insertvalue to %3.
-define { ptr, i64 } @test5({ ptr, i64 } %0, ptr %1, i1 %.not) {
+; Negative test, %.elt2 is defined in bb %end, it can't be accessed from %then,
+; so we can't add insertvalue to %then.
+define { ptr, i64 } @test5({ ptr, i64 } %src, ptr %pointer, i1 %cond) {
; CHECK-LABEL: define { ptr, i64 } @test5(
-; CHECK-SAME: { ptr, i64 } [[TMP0:%.*]], ptr [[TMP1:%.*]], i1 [[DOTNOT:%.*]]) {
-; CHECK-NEXT: br i1 [[DOTNOT]], label %[[BB3:.*]], label %[[BB4:.*]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: store ptr null, ptr [[TMP1]], align 8
-; CHECK-NEXT: br label %[[BB5:.*]]
-; CHECK: [[BB4]]:
-; CHECK-NEXT: [[DOTELT1:%.*]] = extractvalue { ptr, i64 } [[TMP0]], 0
-; CHECK-NEXT: br label %[[BB5]]
-; CHECK: [[BB5]]:
-; CHECK-NEXT: [[TMP6:%.*]] = phi ptr [ [[DOTELT1]], %[[BB4]] ], [ null, %[[BB3]] ]
-; CHECK-NEXT: [[DOTELT2:%.*]] = extractvalue { ptr, i64 } [[TMP0]], 1
+; CHECK-SAME: { ptr, i64 } [[SRC:%.*]], ptr [[POINTER:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: store ptr null, ptr [[POINTER]], align 8
+; CHECK-NEXT: br label %[[END:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[DOTELT1:%.*]] = extractvalue { ptr, i64 } [[SRC]], 0
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
+; CHECK-NEXT: [[TMP6:%.*]] = phi ptr [ [[DOTELT1]], %[[ELSE]] ], [ null, %[[THEN]] ]
+; CHECK-NEXT: [[DOTELT2:%.*]] = extractvalue { ptr, i64 } [[SRC]], 1
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i64 } zeroinitializer, ptr [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { ptr, i64 } [[TMP7]], i64 [[DOTELT2]], 1
; CHECK-NEXT: ret { ptr, i64 } [[TMP8]]
;
- br i1 %.not, label %3, label %4
-
-3: ; preds = %2
- store ptr null, ptr %1, align 8
- br label %5
-
-4: ; preds = %2
- %.elt1 = extractvalue { ptr, i64 } %0, 0
- br label %5
-
-5: ; preds = %4, %3
- %6 = phi ptr [ %.elt1, %4 ], [ null, %3 ]
- %.elt2 = extractvalue { ptr, i64 } %0, 1
- %7 = insertvalue { ptr, i64 } zeroinitializer, ptr %6, 0
- %8 = insertvalue { ptr, i64 } %7, i64 %.elt2, 1
- ret { ptr, i64 } %8
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ store ptr null, ptr %pointer, align 8
+ br label %end
+
+else:
+ %.elt1 = extractvalue { ptr, i64 } %src, 0
+ br label %end
+
+end:
+ %elt = phi ptr [ %.elt1, %else ], [ null, %then ]
+ %.elt2 = extractvalue { ptr, i64 } %src, 1
+ %agg1 = insertvalue { ptr, i64 } zeroinitializer, ptr %elt, 0
+ %res = insertvalue { ptr, i64 } %agg1, i64 %.elt2, 1
+ ret { ptr, i64 } %res
+}
+
+; Negative test, we should not add insertvalue to inner loops.
+define { i64, ptr } @test6({ i64, ptr } %agg1, i1 %cond1, i1 %cond2, { i64, ptr } %agg3) {
+; CHECK-LABEL: define { i64, ptr } @test6(
+; CHECK-SAME: { i64, ptr } [[AGG1:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], { i64, ptr } [[AGG3:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[F10:%.*]] = extractvalue { i64, ptr } [[AGG1]], 0
+; CHECK-NEXT: [[F11:%.*]] = extractvalue { i64, ptr } [[AGG1]], 1
+; CHECK-NEXT: br label %[[HEADER:.*]]
+; CHECK: [[HEADER]]:
+; CHECK-NEXT: [[DOTSROA_01_0:%.*]] = phi i64 [ [[F10]], %[[ENTRY]] ], [ [[DOTSROA_01_1:%.*]], %[[LATCH:.*]] ]
+; CHECK-NEXT: [[DOTSROA_3_0:%.*]] = phi ptr [ [[F11]], %[[ENTRY]] ], [ [[DOTSROA_3_1:%.*]], %[[LATCH]] ]
+; CHECK-NEXT: br i1 [[COND1]], label %[[CHECK:.*]], label %[[EXIT:.*]]
+; CHECK: [[CHECK]]:
+; CHECK-NEXT: br i1 [[COND2]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[F30:%.*]] = extractvalue { i64, ptr } [[AGG3]], 0
+; CHECK-NEXT: [[F31:%.*]] = extractvalue { i64, ptr } [[AGG3]], 1
+; CHECK-NEXT: br label %[[LATCH]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: br label %[[LATCH]]
+; CHECK: [[LATCH]]:
+; CHECK-NEXT: [[DOTSROA_01_1]] = phi i64 [ [[F30]], %[[THEN]] ], [ [[DOTSROA_01_0]], %[[ELSE]] ]
+; CHECK-NEXT: [[DOTSROA_3_1]] = phi ptr [ [[F31]], %[[THEN]] ], [ [[DOTSROA_3_0]], %[[ELSE]] ]
+; CHECK-NEXT: br label %[[HEADER]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { i64, ptr } zeroinitializer, i64 [[DOTSROA_01_0]], 0
+; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { i64, ptr } [[DOTFCA_0_INSERT]], ptr [[DOTSROA_3_0]], 1
+; CHECK-NEXT: ret { i64, ptr } [[DOTFCA_1_INSERT]]
+;
+entry:
+ %f10 = extractvalue { i64, ptr } %agg1, 0
+ %f11 = extractvalue { i64, ptr } %agg1, 1
+ br label %header
+
+header:
+ %.sroa.01.0 = phi i64 [ %f10, %entry ], [ %.sroa.01.1, %latch ]
+ %.sroa.3.0 = phi ptr [ %f11, %entry ], [ %.sroa.3.1, %latch ]
+ br i1 %cond1, label %check, label %exit
+
+check:
+ br i1 %cond2, label %then, label %else
+
+then:
+ %f30 = extractvalue { i64, ptr } %agg3, 0
+ %f31 = extractvalue { i64, ptr } %agg3, 1
+ br label %latch
+
+else:
+ br label %latch
+
+latch:
+ %.sroa.01.1 = phi i64 [ %f30, %then ], [ %.sroa.01.0, %else ]
+ %.sroa.3.1 = phi ptr [ %f31, %then ], [ %.sroa.3.0, %else ]
+ br label %header
+
+exit:
+ %.fca.0.insert = insertvalue { i64, ptr } zeroinitializer, i64 %.sroa.01.0, 0
+ %.fca.1.insert = insertvalue { i64, ptr } %.fca.0.insert, ptr %.sroa.3.0, 1
+ ret { i64, ptr } %.fca.1.insert
+}
+
+; Negative test, we should not add insertvalue cross loop header.
+define { i64, ptr } @test7({ i64, ptr } %agg1, i1 %cond1, i1 %cond2, { i64, ptr } %agg3) {
+; CHECK-LABEL: define { i64, ptr } @test7(
+; CHECK-SAME: { i64, ptr } [[AGG1:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], { i64, ptr } [[AGG3:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[F10:%.*]] = extractvalue { i64, ptr } [[AGG1]], 0
+; CHECK-NEXT: [[F11:%.*]] = extractvalue { i64, ptr } [[AGG1]], 1
+; CHECK-NEXT: br label %[[HEADER:.*]]
+; CHECK: [[HEADER]]:
+; CHECK-NEXT: [[DOTSROA_01_0:%.*]] = phi i64 [ [[F10]], %[[ENTRY]] ], [ [[DOTSROA_01_1:%.*]], %[[LATCH:.*]] ]
+; CHECK-NEXT: [[DOTSROA_3_0:%.*]] = phi ptr [ [[F11]], %[[ENTRY]] ], [ [[DOTSROA_3_1:%.*]], %[[LATCH]] ]
+; CHECK-NEXT: [[FCA:%.*]] = phi { i64, ptr } [ [[AGG1]], %[[ENTRY]] ], [ [[F2_MERGED:%.*]], %[[LATCH]] ]
+; CHECK-NEXT: br i1 [[COND1]], label %[[CHECK:.*]], label %[[EXIT:.*]]
+; CHECK: [[CHECK]]:
+; CHECK-NEXT: br i1 [[COND2]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[F30:%.*]] = extractvalue { i64, ptr } [[AGG3]], 0
+; CHECK-NEXT: [[F31:%.*]] = extractvalue { i64, ptr } [[AGG3]], 1
+; CHECK-NEXT: br label %[[LATCH]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[RIGHT0:%.*]] = insertvalue { i64, ptr } poison, i64 [[DOTSROA_01_0]], 0
+; CHECK-NEXT: [[RIGHT1:%.*]] = insertvalue { i64, ptr } [[RIGHT0]], ptr [[DOTSROA_3_0]], 1
+; CHECK-NEXT: br label %[[LATCH]]
+; CHECK: [[LATCH]]:
+; CHECK-NEXT: [[DOTSROA_01_1]] = phi i64 [ [[F30]], %[[THEN]] ], [ [[DOTSROA_01_0]], %[[ELSE]] ]
+; CHECK-NEXT: [[DOTSROA_3_1]] = phi ptr [ [[F31]], %[[THEN]] ], [ [[DOTSROA_3_0]], %[[ELSE]] ]
+; CHECK-NEXT: [[F2_MERGED]] = phi { i64, ptr } [ [[AGG3]], %[[THEN]] ], [ [[RIGHT1]], %[[ELSE]] ]
+; CHECK-NEXT: br label %[[HEADER]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret { i64, ptr } [[FCA]]
+;
+entry:
+ %f10 = extractvalue { i64, ptr } %agg1, 0
+ %f11 = extractvalue { i64, ptr } %agg1, 1
+ br label %header
+
+header:
+ %.sroa.01.0 = phi i64 [ %f10, %entry ], [ %.sroa.01.1, %latch ]
+ %.sroa.3.0 = phi ptr [ %f11, %entry ], [ %.sroa.3.1, %latch ]
+ %fca = phi { i64, ptr } [ %agg1, %entry ], [ %f2.merged, %latch ]
+ br i1 %cond1, label %check, label %exit
+
+check:
+ br i1 %cond2, label %then, label %else
+
+then:
+ %f30 = extractvalue { i64, ptr } %agg3, 0
+ %f31 = extractvalue { i64, ptr } %agg3, 1
+ br label %latch
+
+else:
+ %right0 = insertvalue { i64, ptr } poison, i64 %.sroa.01.0, 0
+ %right1 = insertvalue { i64, ptr } %right0, ptr %.sroa.3.0, 1
+ br label %latch
+
+latch:
+ %.sroa.01.1 = phi i64 [ %f30, %then ], [ %.sroa.01.0, %else ]
+ %.sroa.3.1 = phi ptr [ %f31, %then ], [ %.sroa.3.0, %else ]
+ %f2.merged = phi { i64, ptr } [ %agg3, %then ], [ %right1, %else ]
+ br label %header
+
+exit:
+ ret { i64, ptr } %fca
}
>From d3d61cda897c27b5e8e1e0d2857950cf6c50af32 Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Thu, 22 Aug 2024 16:24:16 -0700
Subject: [PATCH 4/4] Use control flow and profile information to avoid
infinite loop.
---
.../InstCombine/InstCombineVectorOps.cpp | 30 +++++-----
.../fold-aggregate-reconstruction.ll | 58 +++++++++++++------
2 files changed, 54 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index dafa3948ae8939..696560bd708040 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -19,8 +19,8 @@
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -1140,11 +1140,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
return nullptr;
// Do some sanity check if we need to add insertvalue into predecessors.
- Loop *OrigLoop, *UseLoop;
- if (LI) {
- OrigLoop = LI->getLoopFor(OrigIVI.getParent());
- UseLoop = LI->getLoopFor(UseBB);
- }
+ auto OrigBB = OrigIVI.getParent();
for (auto &It : SourceAggregates) {
if (Describe(It.second) == AggregateDescription::Found)
continue;
@@ -1153,17 +1149,19 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
if (EltDefinedInUseBB)
return nullptr;
- if (LI) {
- // Don't add insertvalue into inner loops.
- Loop *PredLoop = LI->getLoopFor(It.first);
- if (OrigLoop != PredLoop && (!OrigLoop || OrigLoop->contains(PredLoop)))
- return nullptr;
+ // Do this transformation cross loop boundary may cause dead loop. So we
+ // should avoid this situation. But LoopInfo is not generally available, we
+ // must be conservative here.
+ // If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB
+ // can't be in inner loop.
+ if (UseBB == OrigBB)
+ continue;
- // Don't cross loop header.
- if (UseLoop && UseLoop->getHeader() == UseBB &&
- UseLoop->contains(It.first))
- return nullptr;
- }
+ // It is always safe to move InsertValue to colder places.
+ if (BFI && BFI->getBlockFreq(OrigBB) > BFI->getBlockFreq(It.first))
+ continue;
+
+ return nullptr;
}
// For predecessors without appropriate source aggregate, create one in the
diff --git a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
index 9753b4d234fb8b..ff0e37ff2c89de 100644
--- a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
+++ b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes='instcombine<use-loop-info>' < %s | FileCheck %s
+; RUN: opt -S -passes='require<profile-summary>,instcombine' < %s | FileCheck %s
declare {ptr, i64} @bar(i64)
@@ -314,10 +314,10 @@ exit:
ret { i64, ptr } %.fca.1.insert
}
-; Negative test, we should not add insertvalue cross loop header.
-define { i64, ptr } @test7({ i64, ptr } %agg1, i1 %cond1, i1 %cond2, { i64, ptr } %agg3) {
+; With profile information we can add insertvalue into colder block, even if it is in inner loop.
+define { i64, ptr } @test7({ i64, ptr } %agg1, i1 %cond1, i1 %cond2, { i64, ptr } %agg3) !prof !52 {
; CHECK-LABEL: define { i64, ptr } @test7(
-; CHECK-SAME: { i64, ptr } [[AGG1:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], { i64, ptr } [[AGG3:%.*]]) {
+; CHECK-SAME: { i64, ptr } [[AGG1:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], { i64, ptr } [[AGG3:%.*]]) !prof [[PROF14:![0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[F10:%.*]] = extractvalue { i64, ptr } [[AGG1]], 0
; CHECK-NEXT: [[F11:%.*]] = extractvalue { i64, ptr } [[AGG1]], 1
@@ -325,8 +325,8 @@ define { i64, ptr } @test7({ i64, ptr } %agg1, i1 %cond1, i1 %cond2, { i64, ptr
; CHECK: [[HEADER]]:
; CHECK-NEXT: [[DOTSROA_01_0:%.*]] = phi i64 [ [[F10]], %[[ENTRY]] ], [ [[DOTSROA_01_1:%.*]], %[[LATCH:.*]] ]
; CHECK-NEXT: [[DOTSROA_3_0:%.*]] = phi ptr [ [[F11]], %[[ENTRY]] ], [ [[DOTSROA_3_1:%.*]], %[[LATCH]] ]
-; CHECK-NEXT: [[FCA:%.*]] = phi { i64, ptr } [ [[AGG1]], %[[ENTRY]] ], [ [[F2_MERGED:%.*]], %[[LATCH]] ]
-; CHECK-NEXT: br i1 [[COND1]], label %[[CHECK:.*]], label %[[EXIT:.*]]
+; CHECK-NEXT: [[DOTFCA_1_INSERT_MERGED:%.*]] = phi { i64, ptr } [ [[AGG1]], %[[ENTRY]] ], [ [[DOTMERGED:%.*]], %[[LATCH]] ]
+; CHECK-NEXT: br i1 [[COND1]], label %[[CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF15:![0-9]+]]
; CHECK: [[CHECK]]:
; CHECK-NEXT: br i1 [[COND2]], label %[[THEN:.*]], label %[[ELSE:.*]]
; CHECK: [[THEN]]:
@@ -334,16 +334,16 @@ define { i64, ptr } @test7({ i64, ptr } %agg1, i1 %cond1, i1 %cond2, { i64, ptr
; CHECK-NEXT: [[F31:%.*]] = extractvalue { i64, ptr } [[AGG3]], 1
; CHECK-NEXT: br label %[[LATCH]]
; CHECK: [[ELSE]]:
-; CHECK-NEXT: [[RIGHT0:%.*]] = insertvalue { i64, ptr } poison, i64 [[DOTSROA_01_0]], 0
-; CHECK-NEXT: [[RIGHT1:%.*]] = insertvalue { i64, ptr } [[RIGHT0]], ptr [[DOTSROA_3_0]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = insertvalue { i64, ptr } poison, i64 [[DOTSROA_01_0]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i64, ptr } [[TMP0]], ptr [[DOTSROA_3_0]], 1
; CHECK-NEXT: br label %[[LATCH]]
; CHECK: [[LATCH]]:
-; CHECK-NEXT: [[DOTSROA_01_1]] = phi i64 [ [[F30]], %[[THEN]] ], [ [[DOTSROA_01_0]], %[[ELSE]] ]
-; CHECK-NEXT: [[DOTSROA_3_1]] = phi ptr [ [[F31]], %[[THEN]] ], [ [[DOTSROA_3_0]], %[[ELSE]] ]
-; CHECK-NEXT: [[F2_MERGED]] = phi { i64, ptr } [ [[AGG3]], %[[THEN]] ], [ [[RIGHT1]], %[[ELSE]] ]
+; CHECK-NEXT: [[DOTSROA_01_1]] = phi i64 [ [[DOTSROA_01_0]], %[[ELSE]] ], [ [[F30]], %[[THEN]] ]
+; CHECK-NEXT: [[DOTSROA_3_1]] = phi ptr [ [[DOTSROA_3_0]], %[[ELSE]] ], [ [[F31]], %[[THEN]] ]
+; CHECK-NEXT: [[DOTMERGED]] = phi { i64, ptr } [ [[TMP1]], %[[ELSE]] ], [ [[AGG3]], %[[THEN]] ]
; CHECK-NEXT: br label %[[HEADER]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret { i64, ptr } [[FCA]]
+; CHECK-NEXT: ret { i64, ptr } [[DOTFCA_1_INSERT_MERGED]]
;
entry:
%f10 = extractvalue { i64, ptr } %agg1, 0
@@ -353,8 +353,7 @@ entry:
header:
%.sroa.01.0 = phi i64 [ %f10, %entry ], [ %.sroa.01.1, %latch ]
%.sroa.3.0 = phi ptr [ %f11, %entry ], [ %.sroa.3.1, %latch ]
- %fca = phi { i64, ptr } [ %agg1, %entry ], [ %f2.merged, %latch ]
- br i1 %cond1, label %check, label %exit
+ br i1 %cond1, label %check, label %exit, !prof !100
check:
br i1 %cond2, label %then, label %else
@@ -365,16 +364,39 @@ then:
br label %latch
else:
- %right0 = insertvalue { i64, ptr } poison, i64 %.sroa.01.0, 0
- %right1 = insertvalue { i64, ptr } %right0, ptr %.sroa.3.0, 1
br label %latch
latch:
%.sroa.01.1 = phi i64 [ %f30, %then ], [ %.sroa.01.0, %else ]
%.sroa.3.1 = phi ptr [ %f31, %then ], [ %.sroa.3.0, %else ]
- %f2.merged = phi { i64, ptr } [ %agg3, %then ], [ %right1, %else ]
br label %header
exit:
- ret { i64, ptr } %fca
+ %.fca.0.insert = insertvalue { i64, ptr } zeroinitializer, i64 %.sroa.01.0, 0
+ %.fca.1.insert = insertvalue { i64, ptr } %.fca.0.insert, ptr %.sroa.3.0, 1
+ ret { i64, ptr } %.fca.1.insert
}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+; The threshold for hot count is 100
+
+!52 = !{!"function_entry_count", i64 1000}
+!100 = !{!"branch_weights", i32 1, i32 100}
+;.
+; CHECK: [[PROF14]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF15]] = !{!"branch_weights", i32 1, i32 100}
+;.
More information about the llvm-commits
mailing list