[llvm] [InstCombine] Extend folding of aggregate construction to cases when source aggregates are partially available (PR #100828)

via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 8 11:07:48 PST 2024


https://github.com/weiguozhi updated https://github.com/llvm/llvm-project/pull/100828

>From fc0de437b517a21e72349f72e7feca1fdc08053e Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Fri, 26 Jul 2024 15:26:21 -0700
Subject: [PATCH 1/8] [InstCombine] Extend folding of aggregate construction to
 cases when source aggregates are partially available

Function foldAggregateConstructionIntoAggregateReuse can fold
  insertvalue(phi(extractvalue(src1), extractvalue(src2)))
into
  phi(src1, src2)
when we can find source aggregates in all predecessors.

This patch extends it to handle following case
  insertvalue(phi(extractvalue(src1), elm2))
into
  phi(src1, insertvalue(elm2))
with the condition that the predecessor without source aggregate has only one
successor.
---
 .../InstCombine/InstCombineVectorOps.cpp      |  33 ++-
 .../fold-aggregate-reconstruction.ll          | 215 ++++++++++++++++++
 .../merging-multiple-stores-into-successor.ll |   7 +-
 .../phi-aware-aggregate-reconstruction.ll     |  26 +--
 4 files changed, 260 insertions(+), 21 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 753ed55523c843..5377bf6f033d00 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1106,6 +1106,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
   // from which all the elements were originally extracted from?
   // Note that we want for the map to have stable iteration order!
   SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates;
+  bool FoundSrcAgg = false;
   for (BasicBlock *Pred : Preds) {
     std::pair<decltype(SourceAggregates)::iterator, bool> IV =
         SourceAggregates.insert({Pred, nullptr});
@@ -1117,9 +1118,35 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
     // aggregate produced by OrigIVI must have been originally extracted from
     // the same aggregate. Is that so? Can we find said original aggregate?
     SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
-    if (Describe(SourceAggregate) != AggregateDescription::Found)
-      return nullptr; // Give up.
-    IV.first->second = *SourceAggregate;
+    if (Describe(SourceAggregate) == AggregateDescription::Found) {
+      FoundSrcAgg = true;
+      IV.first->second = *SourceAggregate;
+    } else {
+      // If UseBB is the single successor of Pred, we can add InsertValue to
+      // Pred.
+      if (succ_size(Pred) != 1)
+        return nullptr; // Give up.
+    }
+  }
+
+  if (!FoundSrcAgg)
+    return nullptr;
+
+  // For predecessors without appropriate source aggregate, create one in the
+  // predecessor.
+  for (auto &It : SourceAggregates) {
+    if (Describe(It.second) == AggregateDescription::Found)
+      continue;
+
+    BasicBlock *Pred = It.first;
+    Builder.SetInsertPoint(Pred, Pred->getTerminator()->getIterator());
+    Value *V = PoisonValue::get(AggTy);
+    for (auto I : enumerate(AggElts)) {
+      Value *Elt = (*I.value())->DoPHITranslation(UseBB, Pred);
+      V = Builder.CreateInsertValue(V, Elt, I.index());
+    }
+
+    It.second = V;
   }
 
   // All good! Now we just need to thread the source aggregates here.
diff --git a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
new file mode 100644
index 00000000000000..a75e9f1580ab96
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
@@ -0,0 +1,215 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+declare {ptr, i64} @bar(i64)
+
+; Basic test.
+define {ptr, i64} @test1(i1 %cond1, ptr %p1, ptr %p2) {
+; CHECK-LABEL: define { ptr, i64 } @test1(
+; CHECK-SAME: i1 [[COND1:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT:    br i1 [[COND1]], label %[[BBB1:.*]], label %[[BBB2:.*]]
+; CHECK:       [[BBB1]]:
+; CHECK-NEXT:    [[CALL1:%.*]] = call { ptr, i64 } @bar(i64 0)
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[BBB2]]:
+; CHECK-NEXT:    [[VAL21:%.*]] = load ptr, ptr [[P1]], align 8
+; CHECK-NEXT:    [[VAL22:%.*]] = load i64, ptr [[P2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { ptr, i64 } poison, ptr [[VAL21]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { ptr, i64 } [[TMP1]], i64 [[VAL22]], 1
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RES_MERGED:%.*]] = phi { ptr, i64 } [ [[CALL1]], %[[BBB1]] ], [ [[TMP2]], %[[BBB2]] ]
+; CHECK-NEXT:    ret { ptr, i64 } [[RES_MERGED]]
+;
+  br i1 %cond1, label %bbb1, label %bbb2
+
+bbb1:
+  %call1 = call {ptr, i64} @bar(i64 0)
+  %val11 = extractvalue { ptr, i64 } %call1, 0
+  %val12 = extractvalue { ptr, i64 } %call1, 1
+  br label %exit
+
+bbb2:
+  %val21 = load ptr, ptr %p1
+  %val22 = load i64, ptr %p2
+  br label %exit
+
+exit:
+  %val1 = phi ptr [%val11, %bbb1], [%val21, %bbb2]
+  %val2 = phi i64 [%val12, %bbb1], [%val22, %bbb2]
+  %tmp = insertvalue { ptr, i64 } poison, ptr %val1, 0
+  %res = insertvalue { ptr, i64 } %tmp, i64 %val2, 1
+  ret {ptr, i64} %res
+}
+
+; Test with more predecessors.
+define {ptr, i64} @test2(i1 %cond1, i1 %cond2, ptr %p1, ptr %p2) {
+; CHECK-LABEL: define { ptr, i64 } @test2(
+; CHECK-SAME: i1 [[COND1:%.*]], i1 [[COND2:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT:    br i1 [[COND1]], label %[[BBB1:.*]], label %[[BBB4:.*]]
+; CHECK:       [[BBB1]]:
+; CHECK-NEXT:    br i1 [[COND2]], label %[[BBB2:.*]], label %[[BBB3:.*]]
+; CHECK:       [[BBB2]]:
+; CHECK-NEXT:    [[CALL1:%.*]] = call { ptr, i64 } @bar(i64 0)
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[BBB3]]:
+; CHECK-NEXT:    [[CALL2:%.*]] = call { ptr, i64 } @bar(i64 1)
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[BBB4]]:
+; CHECK-NEXT:    [[VAL31:%.*]] = load ptr, ptr [[P1]], align 8
+; CHECK-NEXT:    [[VAL32:%.*]] = load i64, ptr [[P2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { ptr, i64 } poison, ptr [[VAL31]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { ptr, i64 } [[TMP1]], i64 [[VAL32]], 1
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RES_MERGED:%.*]] = phi { ptr, i64 } [ [[CALL1]], %[[BBB2]] ], [ [[CALL2]], %[[BBB3]] ], [ [[TMP2]], %[[BBB4]] ]
+; CHECK-NEXT:    ret { ptr, i64 } [[RES_MERGED]]
+;
+  br i1 %cond1, label %bbb1, label %bbb4
+
+bbb1:
+  br i1 %cond2, label %bbb2, label %bbb3
+
+bbb2:
+  %call1 = call {ptr, i64} @bar(i64 0)
+  %val11 = extractvalue { ptr, i64 } %call1, 0
+  %val12 = extractvalue { ptr, i64 } %call1, 1
+  br label %exit
+
+bbb3:
+  %call2 = call {ptr, i64} @bar(i64 1)
+  %val21 = extractvalue { ptr, i64 } %call2, 0
+  %val22 = extractvalue { ptr, i64 } %call2, 1
+  br label %exit
+
+bbb4:
+  %val31 = load ptr, ptr %p1
+  %val32 = load i64, ptr %p2
+  br label %exit
+
+exit:
+  %val1 = phi ptr [%val11, %bbb2], [%val21, %bbb3], [%val31, %bbb4]
+  %val2 = phi i64 [%val12, %bbb2], [%val22, %bbb3], [%val32, %bbb4]
+  %tmp = insertvalue { ptr, i64 } poison, ptr %val1, 0
+  %res = insertvalue { ptr, i64 } %tmp, i64 %val2, 1
+  ret {ptr, i64} %res
+}
+
+; Test with multiple PHI instructions.
+define {ptr, i64} @test3(i1 %cond1, i1 %cond2, ptr %val31, i64 %val32) {
+; CHECK-LABEL: define { ptr, i64 } @test3(
+; CHECK-SAME: i1 [[COND1:%.*]], i1 [[COND2:%.*]], ptr [[VAL31:%.*]], i64 [[VAL32:%.*]]) {
+; CHECK-NEXT:    br i1 [[COND1]], label %[[BBB1:.*]], label %[[BBB4:.*]]
+; CHECK:       [[BBB1]]:
+; CHECK-NEXT:    br i1 [[COND2]], label %[[BBB2:.*]], label %[[BBB3:.*]]
+; CHECK:       [[BBB2]]:
+; CHECK-NEXT:    [[CALL1:%.*]] = call { ptr, i64 } @bar(i64 0)
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[BBB3]]:
+; CHECK-NEXT:    [[CALL2:%.*]] = call { ptr, i64 } @bar(i64 1)
+; CHECK-NEXT:    br label %[[BBB5:.*]]
+; CHECK:       [[BBB4]]:
+; CHECK-NEXT:    [[CALL3:%.*]] = call { ptr, i64 } @bar(i64 2)
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { ptr, i64 } poison, ptr [[VAL31]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { ptr, i64 } [[TMP1]], i64 [[VAL32]], 1
+; CHECK-NEXT:    br label %[[BBB5]]
+; CHECK:       [[BBB5]]:
+; CHECK-NEXT:    [[DOTMERGED:%.*]] = phi { ptr, i64 } [ [[CALL2]], %[[BBB3]] ], [ [[TMP2]], %[[BBB4]] ]
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RES_MERGED:%.*]] = phi { ptr, i64 } [ [[CALL1]], %[[BBB2]] ], [ [[DOTMERGED]], %[[BBB5]] ]
+; CHECK-NEXT:    ret { ptr, i64 } [[RES_MERGED]]
+;
+  br i1 %cond1, label %bbb1, label %bbb4
+
+bbb1:
+  br i1 %cond2, label %bbb2, label %bbb3
+
+bbb2:
+  %call1 = call {ptr, i64} @bar(i64 0)
+  %val11 = extractvalue { ptr, i64 } %call1, 0
+  %val12 = extractvalue { ptr, i64 } %call1, 1
+  br label %exit
+
+bbb3:
+  %call2 = call {ptr, i64} @bar(i64 1)
+  %val21 = extractvalue { ptr, i64 } %call2, 0
+  %val22 = extractvalue { ptr, i64 } %call2, 1
+  br label %bbb5
+
+bbb4:
+  %call3 = call {ptr, i64} @bar(i64 2)
+  br label %bbb5
+
+bbb5:
+  %val41 = phi ptr [%val21, %bbb3], [%val31, %bbb4]
+  %val42 = phi i64 [%val22, %bbb3], [%val32, %bbb4]
+  br label %exit
+
+exit:
+  %val1 = phi ptr [%val11, %bbb2], [%val41, %bbb5]
+  %val2 = phi i64 [%val12, %bbb2], [%val42, %bbb5]
+  %tmp = insertvalue { ptr, i64 } poison, ptr %val1, 0
+  %res = insertvalue { ptr, i64 } %tmp, i64 %val2, 1
+  ret {ptr, i64} %res
+}
+
+; Negative test, bbb4 has multiple successors, so we don't add insertvalue to it.
+define {ptr, i64} @test4(i1 %cond1, i1 %cond2, ptr %p1, ptr %p2) {
+; CHECK-LABEL: define { ptr, i64 } @test4(
+; CHECK-SAME: i1 [[COND1:%.*]], i1 [[COND2:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT:    br i1 [[COND1]], label %[[BBB1:.*]], label %[[BBB4:.*]]
+; CHECK:       [[BBB1]]:
+; CHECK-NEXT:    br i1 [[COND2]], label %[[BBB2:.*]], label %[[BBB3:.*]]
+; CHECK:       [[BBB2]]:
+; CHECK-NEXT:    [[CALL1:%.*]] = call { ptr, i64 } @bar(i64 0)
+; CHECK-NEXT:    [[VAL11:%.*]] = extractvalue { ptr, i64 } [[CALL1]], 0
+; CHECK-NEXT:    [[VAL12:%.*]] = extractvalue { ptr, i64 } [[CALL1]], 1
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[BBB3]]:
+; CHECK-NEXT:    [[CALL2:%.*]] = call { ptr, i64 } @bar(i64 1)
+; CHECK-NEXT:    [[VAL21:%.*]] = extractvalue { ptr, i64 } [[CALL2]], 0
+; CHECK-NEXT:    [[VAL22:%.*]] = extractvalue { ptr, i64 } [[CALL2]], 1
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[BBB4]]:
+; CHECK-NEXT:    [[VAL31:%.*]] = load ptr, ptr [[P1]], align 8
+; CHECK-NEXT:    [[VAL32:%.*]] = load i64, ptr [[P2]], align 4
+; CHECK-NEXT:    [[COND3_NOT:%.*]] = icmp eq i64 [[VAL32]], 0
+; CHECK-NEXT:    br i1 [[COND3_NOT]], label %[[EXIT]], label %[[BBB4]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = phi ptr [ [[VAL11]], %[[BBB2]] ], [ [[VAL21]], %[[BBB3]] ], [ [[VAL31]], %[[BBB4]] ]
+; CHECK-NEXT:    [[VAL2:%.*]] = phi i64 [ [[VAL12]], %[[BBB2]] ], [ [[VAL22]], %[[BBB3]] ], [ [[VAL32]], %[[BBB4]] ]
+; CHECK-NEXT:    [[TMP:%.*]] = insertvalue { ptr, i64 } poison, ptr [[VAL1]], 0
+; CHECK-NEXT:    [[RES:%.*]] = insertvalue { ptr, i64 } [[TMP]], i64 [[VAL2]], 1
+; CHECK-NEXT:    ret { ptr, i64 } [[RES]]
+;
+  br i1 %cond1, label %bbb1, label %bbb4
+
+bbb1:
+  br i1 %cond2, label %bbb2, label %bbb3
+
+bbb2:
+  %call1 = call {ptr, i64} @bar(i64 0)
+  %val11 = extractvalue { ptr, i64 } %call1, 0
+  %val12 = extractvalue { ptr, i64 } %call1, 1
+  br label %exit
+
+bbb3:
+  %call2 = call {ptr, i64} @bar(i64 1)
+  %val21 = extractvalue { ptr, i64 } %call2, 0
+  %val22 = extractvalue { ptr, i64 } %call2, 1
+  br label %exit
+
+bbb4:
+  %val31 = load ptr, ptr %p1
+  %val32 = load i64, ptr %p2
+  %cond3 = icmp ne i64 %val32, 0
+  br i1 %cond3, label %bbb4, label %exit
+
+exit:
+  %val1 = phi ptr [%val11, %bbb2], [%val21, %bbb3], [%val31, %bbb4]
+  %val2 = phi i64 [%val12, %bbb2], [%val22, %bbb3], [%val32, %bbb4]
+  %tmp = insertvalue { ptr, i64 } poison, ptr %val1, 0
+  %res = insertvalue { ptr, i64 } %tmp, i64 %val2, 1
+  ret {ptr, i64} %res
+}
diff --git a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll
index fbf58d47a32d20..41e3197e5a2f0b 100644
--- a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll
+++ b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll
@@ -166,14 +166,13 @@ define %struct.half @one_elem_struct_merge(i1 %cond, %struct.half %a, half %b) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
 ; CHECK:       BB0:
-; CHECK-NEXT:    [[TMP0:%.*]] = extractvalue [[STRUCT_HALF:%.*]] [[A:%.*]], 0
 ; CHECK-NEXT:    br label [[SINK:%.*]]
 ; CHECK:       BB1:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertvalue [[STRUCT_HALF:%.*]] poison, half [[B:%.*]], 0
 ; CHECK-NEXT:    br label [[SINK]]
 ; CHECK:       sink:
-; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi half [ [[TMP0]], [[BB0]] ], [ [[B:%.*]], [[BB1]] ]
-; CHECK-NEXT:    [[VAL1:%.*]] = insertvalue [[STRUCT_HALF]] poison, half [[STOREMERGE]], 0
-; CHECK-NEXT:    ret [[STRUCT_HALF]] [[VAL1]]
+; CHECK-NEXT:    [[VAL1_MERGED:%.*]] = phi [[STRUCT_HALF]] [ [[A:%.*]], [[BB0]] ], [ [[TMP0]], [[BB1]] ]
+; CHECK-NEXT:    ret [[STRUCT_HALF]] [[VAL1_MERGED]]
 ;
 entry:
   %alloca = alloca i64
diff --git a/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll b/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll
index 6b7ac445839d25..706ce4e02f2311 100644
--- a/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll
+++ b/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll
@@ -97,28 +97,26 @@ end:
   ret { i32, i32 } %i8
 }
 
-; When coming from %left, elements are swapped
-define { i32, i32 } @negative_test2({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 %c) {
-; CHECK-LABEL: @negative_test2(
+; When coming from %left, elements are swapped, and new insertvalue is created.
+; From right side the old aggregate can be directly reused.
+define { i32, i32 } @positive_test2({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 %c) {
+; CHECK-LABEL: @positive_test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[LEFT:%.*]], label [[RIGHT:%.*]]
 ; CHECK:       left:
 ; CHECK-NEXT:    [[I2:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT:%.*]], 1
 ; CHECK-NEXT:    [[I0:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT]], 0
 ; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { i32, i32 } poison, i32 [[I2]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i32 } [[TMP0]], i32 [[I0]], 1
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       right:
-; CHECK-NEXT:    [[I4:%.*]] = extractvalue { i32, i32 } [[AGG_RIGHT:%.*]], 1
-; CHECK-NEXT:    [[I3:%.*]] = extractvalue { i32, i32 } [[AGG_RIGHT]], 0
 ; CHECK-NEXT:    call void @bar()
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[I5:%.*]] = phi i32 [ [[I2]], [[LEFT]] ], [ [[I3]], [[RIGHT]] ]
-; CHECK-NEXT:    [[I6:%.*]] = phi i32 [ [[I0]], [[LEFT]] ], [ [[I4]], [[RIGHT]] ]
+; CHECK-NEXT:    [[I8_MERGED:%.*]] = phi { i32, i32 } [ [[TMP1]], [[LEFT]] ], [ [[AGG_RIGHT:%.*]], [[RIGHT]] ]
 ; CHECK-NEXT:    call void @baz()
-; CHECK-NEXT:    [[I7:%.*]] = insertvalue { i32, i32 } undef, i32 [[I5]], 0
-; CHECK-NEXT:    [[I8:%.*]] = insertvalue { i32, i32 } [[I7]], i32 [[I6]], 1
-; CHECK-NEXT:    ret { i32, i32 } [[I8]]
+; CHECK-NEXT:    ret { i32, i32 } [[I8_MERGED]]
 ;
 entry:
   %i0 = extractvalue { i32, i32 } %agg_left, 0
@@ -417,14 +415,14 @@ define { i32, i32 } @test8({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 %
 ; CHECK:       left:
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    switch i32 [[VAL_LEFT:%.*]], label [[IMPOSSIBLE:%.*]] [
-; CHECK-NEXT:    i32 -42, label [[END:%.*]]
-; CHECK-NEXT:    i32 42, label [[END]]
+; CHECK-NEXT:      i32 -42, label [[END:%.*]]
+; CHECK-NEXT:      i32 42, label [[END]]
 ; CHECK-NEXT:    ]
 ; CHECK:       right:
 ; CHECK-NEXT:    call void @bar()
 ; CHECK-NEXT:    switch i32 [[VAL_RIGHT:%.*]], label [[IMPOSSIBLE]] [
-; CHECK-NEXT:    i32 42, label [[END]]
-; CHECK-NEXT:    i32 -42, label [[END]]
+; CHECK-NEXT:      i32 42, label [[END]]
+; CHECK-NEXT:      i32 -42, label [[END]]
 ; CHECK-NEXT:    ]
 ; CHECK:       impossible:
 ; CHECK-NEXT:    unreachable

>From 11dba855dea44775edd7d3392de0e1e5b796e2c6 Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Mon, 29 Jul 2024 15:24:52 -0700
Subject: [PATCH 2/8] If a value is defined in UseBB, we can't use it in
 PredBB, so we can't add an insertvalue to PredBB.

---
 .../InstCombine/InstCombineVectorOps.cpp      | 11 +++++-
 .../fold-aggregate-reconstruction.ll          | 37 +++++++++++++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 5377bf6f033d00..d48654263b3b20 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -963,6 +963,9 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
     return AggregateDescription::Found;
   };
 
+  // If an aggregate element is defined in UseBB, we can't use it in PredBB.
+  bool EltDefinedInUseBB = false;
+
   // Given the value \p Elt that was being inserted into element \p EltIdx of an
   // aggregate AggTy, see if \p Elt was originally defined by an
   // appropriate extractvalue (same element index, same aggregate type).
@@ -972,8 +975,11 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
       [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
           std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
     // For now(?), only deal with, at most, a single level of PHI indirection.
-    if (UseBB && PredBB)
+    if (UseBB && PredBB) {
       Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
+      if (Elt && Elt->getParent() == *UseBB)
+        EltDefinedInUseBB = true;
+    }
     // FIXME: deal with multiple levels of PHI indirection?
 
     // Did we find an extraction?
@@ -1138,6 +1144,9 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
     if (Describe(It.second) == AggregateDescription::Found)
       continue;
 
+    if (EltDefinedInUseBB)
+      return nullptr;
+
     BasicBlock *Pred = It.first;
     Builder.SetInsertPoint(Pred, Pred->getTerminator()->getIterator());
     Value *V = PoisonValue::get(AggTy);
diff --git a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
index a75e9f1580ab96..a06f3006268a80 100644
--- a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
+++ b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
@@ -213,3 +213,40 @@ exit:
   %res = insertvalue { ptr, i64 } %tmp, i64 %val2, 1
   ret {ptr, i64} %res
 }
+
+; Negative test, %.elt2 is defined in bb %5, it can't be accessed from %3,
+; so we can't add insertvalue to %3.
+define { ptr, i64 } @test5({ ptr, i64 } %0, ptr %1, i1 %.not) {
+; CHECK-LABEL: define { ptr, i64 } @test5(
+; CHECK-SAME: { ptr, i64 } [[TMP0:%.*]], ptr [[TMP1:%.*]], i1 [[DOTNOT:%.*]]) {
+; CHECK-NEXT:    br i1 [[DOTNOT]], label %[[BB3:.*]], label %[[BB4:.*]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    store ptr null, ptr [[TMP1]], align 8
+; CHECK-NEXT:    br label %[[BB5:.*]]
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[DOTELT1:%.*]] = extractvalue { ptr, i64 } [[TMP0]], 0
+; CHECK-NEXT:    br label %[[BB5]]
+; CHECK:       [[BB5]]:
+; CHECK-NEXT:    [[TMP6:%.*]] = phi ptr [ [[DOTELT1]], %[[BB4]] ], [ null, %[[BB3]] ]
+; CHECK-NEXT:    [[DOTELT2:%.*]] = extractvalue { ptr, i64 } [[TMP0]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i64 } zeroinitializer, ptr [[TMP6]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = insertvalue { ptr, i64 } [[TMP7]], i64 [[DOTELT2]], 1
+; CHECK-NEXT:    ret { ptr, i64 } [[TMP8]]
+;
+  br i1 %.not, label %3, label %4
+
+3:                                                ; preds = %2
+  store ptr null, ptr %1, align 8
+  br label %5
+
+4:                                                ; preds = %2
+  %.elt1 = extractvalue { ptr, i64 } %0, 0
+  br label %5
+
+5:                                                ; preds = %4, %3
+  %6 = phi ptr [ %.elt1, %4 ], [ null, %3 ]
+  %.elt2 = extractvalue { ptr, i64 } %0, 1
+  %7 = insertvalue { ptr, i64 } zeroinitializer, ptr %6, 0
+  %8 = insertvalue { ptr, i64 } %7, i64 %.elt2, 1
+  ret { ptr, i64 } %8
+}

>From a2fee0f44b602b3321f22a6f8d740f515562467f Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Tue, 30 Jul 2024 16:17:12 -0700
Subject: [PATCH 3/8] Prevent add insertvalue to inner loops.

---
 .../InstCombine/InstCombineVectorOps.cpp      |  29 ++-
 .../fold-aggregate-reconstruction.ll          | 190 +++++++++++++++---
 2 files changed, 186 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index d48654263b3b20..dafa3948ae8939 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@@ -1138,15 +1139,39 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
   if (!FoundSrcAgg)
     return nullptr;
 
-  // For predecessors without appropriate source aggregate, create one in the
-  // predecessor.
+  // Do some sanity check if we need to add insertvalue into predecessors.
+  Loop *OrigLoop, *UseLoop;
+  if (LI) {
+    OrigLoop = LI->getLoopFor(OrigIVI.getParent());
+    UseLoop = LI->getLoopFor(UseBB);
+  }
   for (auto &It : SourceAggregates) {
     if (Describe(It.second) == AggregateDescription::Found)
       continue;
 
+    // Element is defined in UseBB, so it can't be used in predecessors.
     if (EltDefinedInUseBB)
       return nullptr;
 
+    if (LI) {
+      // Don't add insertvalue into inner loops.
+      Loop *PredLoop = LI->getLoopFor(It.first);
+      if (OrigLoop != PredLoop && (!OrigLoop || OrigLoop->contains(PredLoop)))
+        return nullptr;
+
+      // Don't cross loop header.
+      if (UseLoop && UseLoop->getHeader() == UseBB &&
+          UseLoop->contains(It.first))
+        return nullptr;
+    }
+  }
+
+  // For predecessors without appropriate source aggregate, create one in the
+  // predecessor.
+  for (auto &It : SourceAggregates) {
+    if (Describe(It.second) == AggregateDescription::Found)
+      continue;
+
     BasicBlock *Pred = It.first;
     Builder.SetInsertPoint(Pred, Pred->getTerminator()->getIterator());
     Value *V = PoisonValue::get(AggTy);
diff --git a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
index a06f3006268a80..9753b4d234fb8b 100644
--- a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
+++ b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+; RUN: opt -S -passes='instcombine<use-loop-info>' < %s | FileCheck %s
 
 declare {ptr, i64} @bar(i64)
 
@@ -214,39 +214,167 @@ exit:
   ret {ptr, i64} %res
 }
 
-; Negative test, %.elt2 is defined in bb %5, it can't be accessed from %3,
-; so we can't add insertvalue to %3.
-define { ptr, i64 } @test5({ ptr, i64 } %0, ptr %1, i1 %.not) {
+; Negative test, %.elt2 is defined in bb %end, it can't be accessed from %then,
+; so we can't add insertvalue to %then.
+define { ptr, i64 } @test5({ ptr, i64 } %src, ptr %pointer, i1 %cond) {
 ; CHECK-LABEL: define { ptr, i64 } @test5(
-; CHECK-SAME: { ptr, i64 } [[TMP0:%.*]], ptr [[TMP1:%.*]], i1 [[DOTNOT:%.*]]) {
-; CHECK-NEXT:    br i1 [[DOTNOT]], label %[[BB3:.*]], label %[[BB4:.*]]
-; CHECK:       [[BB3]]:
-; CHECK-NEXT:    store ptr null, ptr [[TMP1]], align 8
-; CHECK-NEXT:    br label %[[BB5:.*]]
-; CHECK:       [[BB4]]:
-; CHECK-NEXT:    [[DOTELT1:%.*]] = extractvalue { ptr, i64 } [[TMP0]], 0
-; CHECK-NEXT:    br label %[[BB5]]
-; CHECK:       [[BB5]]:
-; CHECK-NEXT:    [[TMP6:%.*]] = phi ptr [ [[DOTELT1]], %[[BB4]] ], [ null, %[[BB3]] ]
-; CHECK-NEXT:    [[DOTELT2:%.*]] = extractvalue { ptr, i64 } [[TMP0]], 1
+; CHECK-SAME: { ptr, i64 } [[SRC:%.*]], ptr [[POINTER:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    store ptr null, ptr [[POINTER]], align 8
+; CHECK-NEXT:    br label %[[END:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[DOTELT1:%.*]] = extractvalue { ptr, i64 } [[SRC]], 0
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    [[TMP6:%.*]] = phi ptr [ [[DOTELT1]], %[[ELSE]] ], [ null, %[[THEN]] ]
+; CHECK-NEXT:    [[DOTELT2:%.*]] = extractvalue { ptr, i64 } [[SRC]], 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertvalue { ptr, i64 } zeroinitializer, ptr [[TMP6]], 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertvalue { ptr, i64 } [[TMP7]], i64 [[DOTELT2]], 1
 ; CHECK-NEXT:    ret { ptr, i64 } [[TMP8]]
 ;
-  br i1 %.not, label %3, label %4
-
-3:                                                ; preds = %2
-  store ptr null, ptr %1, align 8
-  br label %5
-
-4:                                                ; preds = %2
-  %.elt1 = extractvalue { ptr, i64 } %0, 0
-  br label %5
-
-5:                                                ; preds = %4, %3
-  %6 = phi ptr [ %.elt1, %4 ], [ null, %3 ]
-  %.elt2 = extractvalue { ptr, i64 } %0, 1
-  %7 = insertvalue { ptr, i64 } zeroinitializer, ptr %6, 0
-  %8 = insertvalue { ptr, i64 } %7, i64 %.elt2, 1
-  ret { ptr, i64 } %8
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  store ptr null, ptr %pointer, align 8
+  br label %end
+
+else:
+  %.elt1 = extractvalue { ptr, i64 } %src, 0
+  br label %end
+
+end:
+  %elt = phi ptr [ %.elt1, %else ], [ null, %then ]
+  %.elt2 = extractvalue { ptr, i64 } %src, 1
+  %agg1 = insertvalue { ptr, i64 } zeroinitializer, ptr %elt, 0
+  %res = insertvalue { ptr, i64 } %agg1, i64 %.elt2, 1
+  ret { ptr, i64 } %res
+}
+
+; Negative test, we should not add insertvalue to inner loops.
+define { i64, ptr } @test6({ i64, ptr } %agg1, i1 %cond1, i1 %cond2, { i64, ptr } %agg3) {
+; CHECK-LABEL: define { i64, ptr } @test6(
+; CHECK-SAME: { i64, ptr } [[AGG1:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], { i64, ptr } [[AGG3:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[F10:%.*]] = extractvalue { i64, ptr } [[AGG1]], 0
+; CHECK-NEXT:    [[F11:%.*]] = extractvalue { i64, ptr } [[AGG1]], 1
+; CHECK-NEXT:    br label %[[HEADER:.*]]
+; CHECK:       [[HEADER]]:
+; CHECK-NEXT:    [[DOTSROA_01_0:%.*]] = phi i64 [ [[F10]], %[[ENTRY]] ], [ [[DOTSROA_01_1:%.*]], %[[LATCH:.*]] ]
+; CHECK-NEXT:    [[DOTSROA_3_0:%.*]] = phi ptr [ [[F11]], %[[ENTRY]] ], [ [[DOTSROA_3_1:%.*]], %[[LATCH]] ]
+; CHECK-NEXT:    br i1 [[COND1]], label %[[CHECK:.*]], label %[[EXIT:.*]]
+; CHECK:       [[CHECK]]:
+; CHECK-NEXT:    br i1 [[COND2]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[F30:%.*]] = extractvalue { i64, ptr } [[AGG3]], 0
+; CHECK-NEXT:    [[F31:%.*]] = extractvalue { i64, ptr } [[AGG3]], 1
+; CHECK-NEXT:    br label %[[LATCH]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    br label %[[LATCH]]
+; CHECK:       [[LATCH]]:
+; CHECK-NEXT:    [[DOTSROA_01_1]] = phi i64 [ [[F30]], %[[THEN]] ], [ [[DOTSROA_01_0]], %[[ELSE]] ]
+; CHECK-NEXT:    [[DOTSROA_3_1]] = phi ptr [ [[F31]], %[[THEN]] ], [ [[DOTSROA_3_0]], %[[ELSE]] ]
+; CHECK-NEXT:    br label %[[HEADER]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue { i64, ptr } zeroinitializer, i64 [[DOTSROA_01_0]], 0
+; CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue { i64, ptr } [[DOTFCA_0_INSERT]], ptr [[DOTSROA_3_0]], 1
+; CHECK-NEXT:    ret { i64, ptr } [[DOTFCA_1_INSERT]]
+;
+entry:
+  %f10 = extractvalue { i64, ptr } %agg1, 0
+  %f11 = extractvalue { i64, ptr } %agg1, 1
+  br label %header
+
+header:
+  %.sroa.01.0 = phi i64 [ %f10, %entry ], [ %.sroa.01.1, %latch ]
+  %.sroa.3.0 = phi ptr [ %f11, %entry ], [ %.sroa.3.1, %latch ]
+  br i1 %cond1, label %check, label %exit
+
+check:
+  br i1 %cond2, label %then, label %else
+
+then:
+  %f30 = extractvalue { i64, ptr } %agg3, 0
+  %f31 = extractvalue { i64, ptr } %agg3, 1
+  br label %latch
+
+else:
+  br label %latch
+
+latch:
+  %.sroa.01.1 = phi i64 [ %f30, %then ], [ %.sroa.01.0, %else ]
+  %.sroa.3.1 = phi ptr [ %f31, %then ], [ %.sroa.3.0, %else ]
+  br label %header
+
+exit:
+  %.fca.0.insert = insertvalue { i64, ptr } zeroinitializer, i64 %.sroa.01.0, 0
+  %.fca.1.insert = insertvalue { i64, ptr } %.fca.0.insert, ptr %.sroa.3.0, 1
+  ret { i64, ptr } %.fca.1.insert
+}
+
+; Negative test, we should not add insertvalue cross loop header.
+define { i64, ptr } @test7({ i64, ptr } %agg1, i1 %cond1, i1 %cond2, { i64, ptr } %agg3) {
+; CHECK-LABEL: define { i64, ptr } @test7(
+; CHECK-SAME: { i64, ptr } [[AGG1:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], { i64, ptr } [[AGG3:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[F10:%.*]] = extractvalue { i64, ptr } [[AGG1]], 0
+; CHECK-NEXT:    [[F11:%.*]] = extractvalue { i64, ptr } [[AGG1]], 1
+; CHECK-NEXT:    br label %[[HEADER:.*]]
+; CHECK:       [[HEADER]]:
+; CHECK-NEXT:    [[DOTSROA_01_0:%.*]] = phi i64 [ [[F10]], %[[ENTRY]] ], [ [[DOTSROA_01_1:%.*]], %[[LATCH:.*]] ]
+; CHECK-NEXT:    [[DOTSROA_3_0:%.*]] = phi ptr [ [[F11]], %[[ENTRY]] ], [ [[DOTSROA_3_1:%.*]], %[[LATCH]] ]
+; CHECK-NEXT:    [[FCA:%.*]] = phi { i64, ptr } [ [[AGG1]], %[[ENTRY]] ], [ [[F2_MERGED:%.*]], %[[LATCH]] ]
+; CHECK-NEXT:    br i1 [[COND1]], label %[[CHECK:.*]], label %[[EXIT:.*]]
+; CHECK:       [[CHECK]]:
+; CHECK-NEXT:    br i1 [[COND2]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[F30:%.*]] = extractvalue { i64, ptr } [[AGG3]], 0
+; CHECK-NEXT:    [[F31:%.*]] = extractvalue { i64, ptr } [[AGG3]], 1
+; CHECK-NEXT:    br label %[[LATCH]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[RIGHT0:%.*]] = insertvalue { i64, ptr } poison, i64 [[DOTSROA_01_0]], 0
+; CHECK-NEXT:    [[RIGHT1:%.*]] = insertvalue { i64, ptr } [[RIGHT0]], ptr [[DOTSROA_3_0]], 1
+; CHECK-NEXT:    br label %[[LATCH]]
+; CHECK:       [[LATCH]]:
+; CHECK-NEXT:    [[DOTSROA_01_1]] = phi i64 [ [[F30]], %[[THEN]] ], [ [[DOTSROA_01_0]], %[[ELSE]] ]
+; CHECK-NEXT:    [[DOTSROA_3_1]] = phi ptr [ [[F31]], %[[THEN]] ], [ [[DOTSROA_3_0]], %[[ELSE]] ]
+; CHECK-NEXT:    [[F2_MERGED]] = phi { i64, ptr } [ [[AGG3]], %[[THEN]] ], [ [[RIGHT1]], %[[ELSE]] ]
+; CHECK-NEXT:    br label %[[HEADER]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret { i64, ptr } [[FCA]]
+;
+entry:
+  %f10 = extractvalue { i64, ptr } %agg1, 0
+  %f11 = extractvalue { i64, ptr } %agg1, 1
+  br label %header
+
+header:
+  %.sroa.01.0 = phi i64 [ %f10, %entry ], [ %.sroa.01.1, %latch ]
+  %.sroa.3.0 = phi ptr [ %f11, %entry ], [ %.sroa.3.1, %latch ]
+  %fca = phi { i64, ptr } [ %agg1, %entry ], [ %f2.merged, %latch ]
+  br i1 %cond1, label %check, label %exit
+
+check:
+  br i1 %cond2, label %then, label %else
+
+then:
+  %f30 = extractvalue { i64, ptr } %agg3, 0
+  %f31 = extractvalue { i64, ptr } %agg3, 1
+  br label %latch
+
+else:
+  %right0 = insertvalue { i64, ptr } poison, i64 %.sroa.01.0, 0
+  %right1 = insertvalue { i64, ptr } %right0, ptr %.sroa.3.0, 1
+  br label %latch
+
+latch:
+  %.sroa.01.1 = phi i64 [ %f30, %then ], [ %.sroa.01.0, %else ]
+  %.sroa.3.1 = phi ptr [ %f31, %then ], [ %.sroa.3.0, %else ]
+  %f2.merged = phi { i64, ptr } [ %agg3, %then ], [ %right1, %else ]
+  br label %header
+
+exit:
+  ret { i64, ptr } %fca
 }

>From d3d61cda897c27b5e8e1e0d2857950cf6c50af32 Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Thu, 22 Aug 2024 16:24:16 -0700
Subject: [PATCH 4/8] Use control flow and profile information to avoid
 infinite loop.

---
 .../InstCombine/InstCombineVectorOps.cpp      | 30 +++++-----
 .../fold-aggregate-reconstruction.ll          | 58 +++++++++++++------
 2 files changed, 54 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index dafa3948ae8939..696560bd708040 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -19,8 +19,8 @@
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@@ -1140,11 +1140,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
     return nullptr;
 
   // Do some sanity check if we need to add insertvalue into predecessors.
-  Loop *OrigLoop, *UseLoop;
-  if (LI) {
-    OrigLoop = LI->getLoopFor(OrigIVI.getParent());
-    UseLoop = LI->getLoopFor(UseBB);
-  }
+  auto OrigBB = OrigIVI.getParent();
   for (auto &It : SourceAggregates) {
     if (Describe(It.second) == AggregateDescription::Found)
       continue;
@@ -1153,17 +1149,19 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
     if (EltDefinedInUseBB)
       return nullptr;
 
-    if (LI) {
-      // Don't add insertvalue into inner loops.
-      Loop *PredLoop = LI->getLoopFor(It.first);
-      if (OrigLoop != PredLoop && (!OrigLoop || OrigLoop->contains(PredLoop)))
-        return nullptr;
+    // Do this transformation cross loop boundary may cause dead loop. So we
+    // should avoid this situation. But LoopInfo is not generally available, we
+    // must be conservative here.
+    // If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB
+    // can't be in inner loop.
+    if (UseBB == OrigBB)
+      continue;
 
-      // Don't cross loop header.
-      if (UseLoop && UseLoop->getHeader() == UseBB &&
-          UseLoop->contains(It.first))
-        return nullptr;
-    }
+    // It is always safe to move InsertValue to colder places.
+    if (BFI && BFI->getBlockFreq(OrigBB) > BFI->getBlockFreq(It.first))
+      continue;
+
+    return nullptr;
   }
 
   // For predecessors without appropriate source aggregate, create one in the
diff --git a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
index 9753b4d234fb8b..ff0e37ff2c89de 100644
--- a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
+++ b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes='instcombine<use-loop-info>' < %s | FileCheck %s
+; RUN: opt -S -passes='require<profile-summary>,instcombine' < %s | FileCheck %s
 
 declare {ptr, i64} @bar(i64)
 
@@ -314,10 +314,10 @@ exit:
   ret { i64, ptr } %.fca.1.insert
 }
 
-; Negative test, we should not add insertvalue cross loop header.
-define { i64, ptr } @test7({ i64, ptr } %agg1, i1 %cond1, i1 %cond2, { i64, ptr } %agg3) {
+; With profile information we can add insertvalue into colder block, even if it is in inner loop.
+define { i64, ptr } @test7({ i64, ptr } %agg1, i1 %cond1, i1 %cond2, { i64, ptr } %agg3) !prof !52 {
 ; CHECK-LABEL: define { i64, ptr } @test7(
-; CHECK-SAME: { i64, ptr } [[AGG1:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], { i64, ptr } [[AGG3:%.*]]) {
+; CHECK-SAME: { i64, ptr } [[AGG1:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], { i64, ptr } [[AGG3:%.*]]) !prof [[PROF14:![0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[F10:%.*]] = extractvalue { i64, ptr } [[AGG1]], 0
 ; CHECK-NEXT:    [[F11:%.*]] = extractvalue { i64, ptr } [[AGG1]], 1
@@ -325,8 +325,8 @@ define { i64, ptr } @test7({ i64, ptr } %agg1, i1 %cond1, i1 %cond2, { i64, ptr
 ; CHECK:       [[HEADER]]:
 ; CHECK-NEXT:    [[DOTSROA_01_0:%.*]] = phi i64 [ [[F10]], %[[ENTRY]] ], [ [[DOTSROA_01_1:%.*]], %[[LATCH:.*]] ]
 ; CHECK-NEXT:    [[DOTSROA_3_0:%.*]] = phi ptr [ [[F11]], %[[ENTRY]] ], [ [[DOTSROA_3_1:%.*]], %[[LATCH]] ]
-; CHECK-NEXT:    [[FCA:%.*]] = phi { i64, ptr } [ [[AGG1]], %[[ENTRY]] ], [ [[F2_MERGED:%.*]], %[[LATCH]] ]
-; CHECK-NEXT:    br i1 [[COND1]], label %[[CHECK:.*]], label %[[EXIT:.*]]
+; CHECK-NEXT:    [[DOTFCA_1_INSERT_MERGED:%.*]] = phi { i64, ptr } [ [[AGG1]], %[[ENTRY]] ], [ [[DOTMERGED:%.*]], %[[LATCH]] ]
+; CHECK-NEXT:    br i1 [[COND1]], label %[[CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF15:![0-9]+]]
 ; CHECK:       [[CHECK]]:
 ; CHECK-NEXT:    br i1 [[COND2]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
@@ -334,16 +334,16 @@ define { i64, ptr } @test7({ i64, ptr } %agg1, i1 %cond1, i1 %cond2, { i64, ptr
 ; CHECK-NEXT:    [[F31:%.*]] = extractvalue { i64, ptr } [[AGG3]], 1
 ; CHECK-NEXT:    br label %[[LATCH]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[RIGHT0:%.*]] = insertvalue { i64, ptr } poison, i64 [[DOTSROA_01_0]], 0
-; CHECK-NEXT:    [[RIGHT1:%.*]] = insertvalue { i64, ptr } [[RIGHT0]], ptr [[DOTSROA_3_0]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { i64, ptr } poison, i64 [[DOTSROA_01_0]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i64, ptr } [[TMP0]], ptr [[DOTSROA_3_0]], 1
 ; CHECK-NEXT:    br label %[[LATCH]]
 ; CHECK:       [[LATCH]]:
-; CHECK-NEXT:    [[DOTSROA_01_1]] = phi i64 [ [[F30]], %[[THEN]] ], [ [[DOTSROA_01_0]], %[[ELSE]] ]
-; CHECK-NEXT:    [[DOTSROA_3_1]] = phi ptr [ [[F31]], %[[THEN]] ], [ [[DOTSROA_3_0]], %[[ELSE]] ]
-; CHECK-NEXT:    [[F2_MERGED]] = phi { i64, ptr } [ [[AGG3]], %[[THEN]] ], [ [[RIGHT1]], %[[ELSE]] ]
+; CHECK-NEXT:    [[DOTSROA_01_1]] = phi i64 [ [[DOTSROA_01_0]], %[[ELSE]] ], [ [[F30]], %[[THEN]] ]
+; CHECK-NEXT:    [[DOTSROA_3_1]] = phi ptr [ [[DOTSROA_3_0]], %[[ELSE]] ], [ [[F31]], %[[THEN]] ]
+; CHECK-NEXT:    [[DOTMERGED]] = phi { i64, ptr } [ [[TMP1]], %[[ELSE]] ], [ [[AGG3]], %[[THEN]] ]
 ; CHECK-NEXT:    br label %[[HEADER]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    ret { i64, ptr } [[FCA]]
+; CHECK-NEXT:    ret { i64, ptr } [[DOTFCA_1_INSERT_MERGED]]
 ;
 entry:
   %f10 = extractvalue { i64, ptr } %agg1, 0
@@ -353,8 +353,7 @@ entry:
 header:
   %.sroa.01.0 = phi i64 [ %f10, %entry ], [ %.sroa.01.1, %latch ]
   %.sroa.3.0 = phi ptr [ %f11, %entry ], [ %.sroa.3.1, %latch ]
-  %fca = phi { i64, ptr } [ %agg1, %entry ], [ %f2.merged, %latch ]
-  br i1 %cond1, label %check, label %exit
+  br i1 %cond1, label %check, label %exit, !prof !100
 
 check:
   br i1 %cond2, label %then, label %else
@@ -365,16 +364,39 @@ then:
   br label %latch
 
 else:
-  %right0 = insertvalue { i64, ptr } poison, i64 %.sroa.01.0, 0
-  %right1 = insertvalue { i64, ptr } %right0, ptr %.sroa.3.0, 1
   br label %latch
 
 latch:
   %.sroa.01.1 = phi i64 [ %f30, %then ], [ %.sroa.01.0, %else ]
   %.sroa.3.1 = phi ptr [ %f31, %then ], [ %.sroa.3.0, %else ]
-  %f2.merged = phi { i64, ptr } [ %agg3, %then ], [ %right1, %else ]
   br label %header
 
 exit:
-  ret { i64, ptr } %fca
+  %.fca.0.insert = insertvalue { i64, ptr } zeroinitializer, i64 %.sroa.01.0, 0
+  %.fca.1.insert = insertvalue { i64, ptr } %.fca.0.insert, ptr %.sroa.3.0, 1
+  ret { i64, ptr } %.fca.1.insert
 }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+; The threshold for hot count is 100
+
+!52 = !{!"function_entry_count", i64 1000}
+!100 = !{!"branch_weights", i32 1, i32 100}
+;.
+; CHECK: [[PROF14]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF15]] = !{!"branch_weights", i32 1, i32 100}
+;.

>From 4dcfcb367fa6c2c435b803798a702c7e22c3da01 Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Wed, 18 Sep 2024 11:12:09 -0700
Subject: [PATCH 5/8] Remove the usage of BFI

BFI in InstCombine has limited usage, it's usually not available, so
remove it. Also avoid constructing constant aggregate.
---
 .../InstCombine/InstCombineVectorOps.cpp      |  26 ++--
 .../fold-aggregate-reconstruction.ll          | 111 +++++-------------
 2 files changed, 48 insertions(+), 89 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 696560bd708040..1537a46000079b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1131,8 +1131,9 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
     } else {
       // If UseBB is the single successor of Pred, we can add InsertValue to
       // Pred.
-      if (succ_size(Pred) != 1)
-        return nullptr; // Give up.
+      auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
+      if (!(BI && BI->isUnconditional()))
+        return nullptr;
     }
   }
 
@@ -1154,14 +1155,21 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
     // must be conservative here.
     // If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB
     // can't be in inner loop.
-    if (UseBB == OrigBB)
-      continue;
-
-    // It is always safe to move InsertValue to colder places.
-    if (BFI && BFI->getBlockFreq(OrigBB) > BFI->getBlockFreq(It.first))
-      continue;
+    if (UseBB != OrigBB)
+      return nullptr;
 
-    return nullptr;
+    // Avoid constructing constant aggregate because constant value may expose
+    // more optimizations.
+    bool ConstAgg = true;
+    for (auto I : enumerate(AggElts)) {
+      Value *Elt = (*I.value())->DoPHITranslation(UseBB, It.first);
+      if (!isa<Constant>(Elt)) {
+        ConstAgg = false;
+        break;
+      }
+    }
+    if (ConstAgg)
+      return nullptr;
   }
 
   // For predecessors without appropriate source aggregate, create one in the
diff --git a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
index ff0e37ff2c89de..eb5f96d8f942d3 100644
--- a/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
+++ b/llvm/test/Transforms/InstCombine/fold-aggregate-reconstruction.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes='require<profile-summary>,instcombine' < %s | FileCheck %s
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
 
 declare {ptr, i64} @bar(i64)
 
@@ -314,89 +314,40 @@ exit:
   ret { i64, ptr } %.fca.1.insert
 }
 
-; With profile information we can add insertvalue into colder block, even if it is in inner loop.
-define { i64, ptr } @test7({ i64, ptr } %agg1, i1 %cond1, i1 %cond2, { i64, ptr } %agg3) !prof !52 {
-; CHECK-LABEL: define { i64, ptr } @test7(
-; CHECK-SAME: { i64, ptr } [[AGG1:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], { i64, ptr } [[AGG3:%.*]]) !prof [[PROF14:![0-9]+]] {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[F10:%.*]] = extractvalue { i64, ptr } [[AGG1]], 0
-; CHECK-NEXT:    [[F11:%.*]] = extractvalue { i64, ptr } [[AGG1]], 1
-; CHECK-NEXT:    br label %[[HEADER:.*]]
-; CHECK:       [[HEADER]]:
-; CHECK-NEXT:    [[DOTSROA_01_0:%.*]] = phi i64 [ [[F10]], %[[ENTRY]] ], [ [[DOTSROA_01_1:%.*]], %[[LATCH:.*]] ]
-; CHECK-NEXT:    [[DOTSROA_3_0:%.*]] = phi ptr [ [[F11]], %[[ENTRY]] ], [ [[DOTSROA_3_1:%.*]], %[[LATCH]] ]
-; CHECK-NEXT:    [[DOTFCA_1_INSERT_MERGED:%.*]] = phi { i64, ptr } [ [[AGG1]], %[[ENTRY]] ], [ [[DOTMERGED:%.*]], %[[LATCH]] ]
-; CHECK-NEXT:    br i1 [[COND1]], label %[[CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF15:![0-9]+]]
-; CHECK:       [[CHECK]]:
-; CHECK-NEXT:    br i1 [[COND2]], label %[[THEN:.*]], label %[[ELSE:.*]]
-; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[F30:%.*]] = extractvalue { i64, ptr } [[AGG3]], 0
-; CHECK-NEXT:    [[F31:%.*]] = extractvalue { i64, ptr } [[AGG3]], 1
-; CHECK-NEXT:    br label %[[LATCH]]
-; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { i64, ptr } poison, i64 [[DOTSROA_01_0]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i64, ptr } [[TMP0]], ptr [[DOTSROA_3_0]], 1
-; CHECK-NEXT:    br label %[[LATCH]]
-; CHECK:       [[LATCH]]:
-; CHECK-NEXT:    [[DOTSROA_01_1]] = phi i64 [ [[DOTSROA_01_0]], %[[ELSE]] ], [ [[F30]], %[[THEN]] ]
-; CHECK-NEXT:    [[DOTSROA_3_1]] = phi ptr [ [[DOTSROA_3_0]], %[[ELSE]] ], [ [[F31]], %[[THEN]] ]
-; CHECK-NEXT:    [[DOTMERGED]] = phi { i64, ptr } [ [[TMP1]], %[[ELSE]] ], [ [[AGG3]], %[[THEN]] ]
-; CHECK-NEXT:    br label %[[HEADER]]
+; Negative test, don't construct constant aggregate.
+define {ptr, i64} @test7(i1 %cond1, ptr %p1) {
+; CHECK-LABEL: define { ptr, i64 } @test7(
+; CHECK-SAME: i1 [[COND1:%.*]], ptr [[P1:%.*]]) {
+; CHECK-NEXT:    br i1 [[COND1]], label %[[BBB1:.*]], label %[[BBB2:.*]]
+; CHECK:       [[BBB1]]:
+; CHECK-NEXT:    [[CALL1:%.*]] = call { ptr, i64 } @bar(i64 0)
+; CHECK-NEXT:    [[VAL11:%.*]] = extractvalue { ptr, i64 } [[CALL1]], 0
+; CHECK-NEXT:    [[VAL12:%.*]] = extractvalue { ptr, i64 } [[CALL1]], 1
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[BBB2]]:
+; CHECK-NEXT:    br label %[[EXIT]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    ret { i64, ptr } [[DOTFCA_1_INSERT_MERGED]]
+; CHECK-NEXT:    [[VAL1:%.*]] = phi ptr [ [[VAL11]], %[[BBB1]] ], [ undef, %[[BBB2]] ]
+; CHECK-NEXT:    [[VAL2:%.*]] = phi i64 [ [[VAL12]], %[[BBB1]] ], [ 1, %[[BBB2]] ]
+; CHECK-NEXT:    [[TMP:%.*]] = insertvalue { ptr, i64 } poison, ptr [[VAL1]], 0
+; CHECK-NEXT:    [[RES:%.*]] = insertvalue { ptr, i64 } [[TMP]], i64 [[VAL2]], 1
+; CHECK-NEXT:    ret { ptr, i64 } [[RES]]
 ;
-entry:
-  %f10 = extractvalue { i64, ptr } %agg1, 0
-  %f11 = extractvalue { i64, ptr } %agg1, 1
-  br label %header
-
-header:
-  %.sroa.01.0 = phi i64 [ %f10, %entry ], [ %.sroa.01.1, %latch ]
-  %.sroa.3.0 = phi ptr [ %f11, %entry ], [ %.sroa.3.1, %latch ]
-  br i1 %cond1, label %check, label %exit, !prof !100
-
-check:
-  br i1 %cond2, label %then, label %else
-
-then:
-  %f30 = extractvalue { i64, ptr } %agg3, 0
-  %f31 = extractvalue { i64, ptr } %agg3, 1
-  br label %latch
+  br i1 %cond1, label %bbb1, label %bbb2
 
-else:
-  br label %latch
+bbb1:
+  %call1 = call {ptr, i64} @bar(i64 0)
+  %val11 = extractvalue { ptr, i64 } %call1, 0
+  %val12 = extractvalue { ptr, i64 } %call1, 1
+  br label %exit
 
-latch:
-  %.sroa.01.1 = phi i64 [ %f30, %then ], [ %.sroa.01.0, %else ]
-  %.sroa.3.1 = phi ptr [ %f31, %then ], [ %.sroa.3.0, %else ]
-  br label %header
+bbb2:
+  br label %exit
 
 exit:
-  %.fca.0.insert = insertvalue { i64, ptr } zeroinitializer, i64 %.sroa.01.0, 0
-  %.fca.1.insert = insertvalue { i64, ptr } %.fca.0.insert, ptr %.sroa.3.0, 1
-  ret { i64, ptr } %.fca.1.insert
+  %val1 = phi ptr [%val11, %bbb1], [undef, %bbb2]
+  %val2 = phi i64 [%val12, %bbb1], [1, %bbb2]
+  %tmp = insertvalue { ptr, i64 } poison, ptr %val1, 0
+  %res = insertvalue { ptr, i64 } %tmp, i64 %val2, 1
+  ret {ptr, i64} %res
 }
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"ProfileSummary", !1}
-!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
-!2 = !{!"ProfileFormat", !"InstrProf"}
-!3 = !{!"TotalCount", i64 10000}
-!4 = !{!"MaxCount", i64 10}
-!5 = !{!"MaxInternalCount", i64 1}
-!6 = !{!"MaxFunctionCount", i64 1000}
-!7 = !{!"NumCounts", i64 3}
-!8 = !{!"NumFunctions", i64 3}
-!9 = !{!"DetailedSummary", !10}
-!10 = !{!11, !12, !13}
-!11 = !{i32 10000, i64 100, i32 1}
-!12 = !{i32 999000, i64 100, i32 1}
-!13 = !{i32 999999, i64 1, i32 2}
-; The threshold for hot count is 100
-
-!52 = !{!"function_entry_count", i64 1000}
-!100 = !{!"branch_weights", i32 1, i32 100}
-;.
-; CHECK: [[PROF14]] = !{!"function_entry_count", i64 1000}
-; CHECK: [[PROF15]] = !{!"branch_weights", i32 1, i32 100}
-;.

>From 8f5d251e4348391a510aa62894379027db9da019 Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Wed, 2 Oct 2024 15:41:43 -0700
Subject: [PATCH 6/8] Change the condition expression.

---
 llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 7adc2b8fc4fd04..6967ac781af222 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -19,7 +19,6 @@
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/BasicBlock.h"
@@ -1132,7 +1131,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
       // If UseBB is the single successor of Pred, we can add InsertValue to
       // Pred.
       auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
-      if (!(BI && BI->isUnconditional()))
+      if (!BI || !BI->isUnconditional())
         return nullptr;
     }
   }

>From aef54025298b496d4f0b8766816700659b2f9015 Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Fri, 18 Oct 2024 11:49:13 -0700
Subject: [PATCH 7/8] Do the transform     extract (select (cond, insert(agg,
 elem), FV))  ->  select (cond, elem, extract(FV))

---
 .../InstCombine/InstructionCombining.cpp      | 20 +++++++++++-
 .../InstCombine/extract-select-agg.ll         | 31 ++++++++++++++-----
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 916a14e13ff2af..d33da103778735 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1692,7 +1692,25 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
 
   Value *TV = SI->getTrueValue();
   Value *FV = SI->getFalseValue();
-  if (!(isa<Constant>(TV) || isa<Constant>(FV)))
+  if ((Op.getOpcode() == Instruction::ExtractValue) &&
+      (isa<InsertValueInst>(TV) || isa<InsertValueInst>(FV))) {
+    // extract (select (cond, insert(agg, elem), FV))
+    // -> select (cond, elem, extract(FV))
+    ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&Op);
+    Value *NewTV = simplifyExtractValueInst(TV, EV->getIndices(),
+                                            SQ.getWithInstruction(&Op));
+    Value *NewFV = simplifyExtractValueInst(FV, EV->getIndices(),
+                                            SQ.getWithInstruction(&Op));
+    if (!NewTV && !NewFV)
+      return nullptr;
+    Builder.SetInsertPoint(SI);
+    if (!NewTV)
+      NewTV = Builder.CreateExtractValue(TV, EV->getIndices());
+    if (!NewFV)
+      NewFV = Builder.CreateExtractValue(FV, EV->getIndices());
+    return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr,
+                              SI);
+  } else if (!(isa<Constant>(TV) || isa<Constant>(FV)))
     return nullptr;
 
   // Bool selects with constant operands can be folded to logical ops.
diff --git a/llvm/test/Transforms/InstCombine/extract-select-agg.ll b/llvm/test/Transforms/InstCombine/extract-select-agg.ll
index 6ba6b1a575601d..2813870feeb931 100644
--- a/llvm/test/Transforms/InstCombine/extract-select-agg.ll
+++ b/llvm/test/Transforms/InstCombine/extract-select-agg.ll
@@ -56,14 +56,9 @@ define void @test_select_agg_multiuse(i1 %cond, i64 %v1, i64 %v2, i64 %v3, i64 %
 ; CHECK-LABEL: define void @test_select_agg_multiuse(
 ; CHECK-SAME: i1 [[COND:%.*]], i64 [[V1:%.*]], i64 [[V2:%.*]], i64 [[V3:%.*]], i64 [[V4:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A0:%.*]] = insertvalue { i64, i64 } poison, i64 [[V1]], 0
-; CHECK-NEXT:    [[A1:%.*]] = insertvalue { i64, i64 } [[A0]], i64 [[V2]], 1
-; CHECK-NEXT:    [[B0:%.*]] = insertvalue { i64, i64 } poison, i64 [[V3]], 0
-; CHECK-NEXT:    [[B1:%.*]] = insertvalue { i64, i64 } [[B0]], i64 [[V4]], 1
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND]], { i64, i64 } [[A1]], { i64, i64 } [[B1]]
-; CHECK-NEXT:    [[X:%.*]] = extractvalue { i64, i64 } [[SEL]], 0
+; CHECK-NEXT:    [[X:%.*]] = select i1 [[COND]], i64 [[V1]], i64 [[V3]]
 ; CHECK-NEXT:    call void @use(i64 [[X]])
-; CHECK-NEXT:    [[Y:%.*]] = extractvalue { i64, i64 } [[SEL]], 1
+; CHECK-NEXT:    [[Y:%.*]] = select i1 [[COND]], i64 [[V2]], i64 [[V4]]
 ; CHECK-NEXT:    call void @use(i64 [[Y]])
 ; CHECK-NEXT:    ret void
 ;
@@ -81,3 +76,25 @@ entry:
 }
 
 declare void @use(i64)
+
+define i64 @test_extract_select_insert(ptr %p1, i64 %v) {
+; CHECK-LABEL: define i64 @test_extract_select_insert(
+; CHECK-SAME: ptr [[P1:%.*]], i64 [[V:%.*]]) {
+; CHECK-NEXT:    [[CALL:%.*]] = call { ptr, i64 } @foo()
+; CHECK-NEXT:    [[ELM1:%.*]] = extractvalue { ptr, i64 } [[CALL]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[ELM1]], [[V]]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { ptr, i64 } [[CALL]], 1
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i64 4294967294, i64 [[TMP1]]
+; CHECK-NEXT:    ret i64 [[RES]]
+;
+  %call = call { ptr, i64 } @foo()
+  %elm1 = extractvalue { ptr, i64 } %call, 1
+  %cmp = icmp eq i64 %elm1, %v
+  %fca0 = insertvalue { ptr, i64 } poison, ptr %p1, 0
+  %fca1 = insertvalue { ptr, i64 } %fca0, i64 4294967294, 1
+  %select = select i1 %cmp, { ptr, i64 } %fca1, { ptr, i64 } %call
+  %res = extractvalue { ptr, i64 } %select, 1
+  ret i64 %res
+}
+
+declare { ptr, i64 } @foo()

>From 9cd86255f53c2696161360139ae8ee4eea57412c Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Fri, 8 Nov 2024 11:07:03 -0800
Subject: [PATCH 8/8] Change the loop iterator.

---
 .../Transforms/InstCombine/InstCombineVectorOps.cpp    | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 6967ac781af222..55eff1d5cd71af 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1160,8 +1160,8 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
     // Avoid constructing constant aggregate because constant value may expose
     // more optimizations.
     bool ConstAgg = true;
-    for (auto I : enumerate(AggElts)) {
-      Value *Elt = (*I.value())->DoPHITranslation(UseBB, It.first);
+    for (auto [Idx, Val] : enumerate(AggElts)) {
+      Value *Elt = (*Val)->DoPHITranslation(UseBB, It.first);
       if (!isa<Constant>(Elt)) {
         ConstAgg = false;
         break;
@@ -1180,9 +1180,9 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
     BasicBlock *Pred = It.first;
     Builder.SetInsertPoint(Pred, Pred->getTerminator()->getIterator());
     Value *V = PoisonValue::get(AggTy);
-    for (auto I : enumerate(AggElts)) {
-      Value *Elt = (*I.value())->DoPHITranslation(UseBB, Pred);
-      V = Builder.CreateInsertValue(V, Elt, I.index());
+    for (auto [Idx, Val] : enumerate(AggElts)) {
+      Value *Elt = (*Val)->DoPHITranslation(UseBB, Pred);
+      V = Builder.CreateInsertValue(V, Elt, Idx);
     }
 
     It.second = V;



More information about the llvm-commits mailing list