[llvm] [InstCombinePHI] Enhance PHI CSE to remove redundant phis (PR #163453)

Tue Oct 21 09:32:00 PDT 2025

https://github.com/CongzheUalberta updated https://github.com/llvm/llvm-project/pull/163453

>From 471578931f5a013abf0d31f60829fd771f93c2c9 Mon Sep 17 00:00:00 2001
From: Congzhe Cao <congzhe.cao at huawei.com>
Date: Tue, 14 Oct 2025 17:04:54 -0400
Subject: [PATCH 1/5] [InstCombinePHI] Enhance PHI CSE to remove redundant phis

Enhanced PHI CSE to eliminate redundant PHIs, which could clean up
the IR and open up opportunities for other passes such as loop
vectorization.
---
 .../Transforms/InstCombine/InstCombinePHI.cpp | 89 +++++++++++++++++--
 .../InstCombine/enhanced-phi-cse.ll           | 61 +++++++++++++
 2 files changed, 145 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/enhanced-phi-cse.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 9815644f5f43d..e736e89a3a146 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1621,11 +1621,90 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
     // Note that even though we've just canonicalized this PHI, due to the
     // worklist visitation order, there are no guarantess that *every* PHI
     // has been canonicalized, so we can't just compare operands ranges.
-    if (!PN.isIdenticalToWhenDefined(&IdenticalPN))
-      continue;
-    // Just use that PHI instead then.
-    ++NumPHICSEs;
-    return replaceInstUsesWith(PN, &IdenticalPN);
+    if (PN.isIdenticalToWhenDefined(&IdenticalPN)) {
+      // Just use that PHI instead then.
+      ++NumPHICSEs;
+      return replaceInstUsesWith(PN, &IdenticalPN);
+    }
+
+    // Look for the following pattern and do PHI CSE to clean up the
+    // redundant %phi. Here %phi, %1 and %phi.next perform the same
+    // functionality as %identicalPhi and hence %phi can be eliminated.
+    //
+    // BB1:
+    //   %identicalPhi = phi [ X, %BB0 ], [ %identicalPhi.next, %BB1 ]
+    //   %phi = phi [ X, %BB0 ], [ %phi.next, %BB1 ]
+    //   ...
+    //   %identicalPhi.next = select %cmp, %val, %identicalPhi
+    //   %1 = select %cmp2, %identicalPhi, float %phi
+    //   %phi.next = select %cmp, %val, %1
+    //
+    // Prove that %phi and %identicalPhi are the same by induction:
+    //
+    // Base case: Both %phi and %identicalPhi are equal on entry to the loop.
+    // Inductive case:
+    // Suppose %phi and %identicalPhi are equal at iteration i.
+    // We look at their values at iteration i+1 which are %phi.next and
+    // %identicalPhi.next. They would have become different only when %cmp is
+    // false and the corresponding values %1 and %identicalPhi differ.
+    //
+    // The only condition when %1 and %identicalPh could differ is when %cmp2
+    // is false and %1 is %phi, which contradicts our inductive hypothesis
+    // that %phi and %identicalPhi are equal. Thus %phi and %identicalPhi are
+    // always equal at iteration i+1.
+
+    if (PN.getNumIncomingValues() == 2 && PN.getNumUses() == 1) {
+      unsigned diffVals = 0;
+      unsigned diffValIdx = 0;
+      // Check that only the backedge incoming value is different.
+      for (unsigned i = 0; i < 2; i++) {
+        if (PN.getIncomingValue(i) != IdenticalPN.getIncomingValue(i)) {
+          diffVals++;
+          diffValIdx = i;
+        }
+      }
+      BasicBlock *CurBB = PN.getParent();
+      if (diffVals == 2 || PN.getIncomingBlock(diffValIdx) != CurBB)
+        continue;
+      // Now check that the backedge incoming values are two select
+      // instructions that are in the same BB, and have the same condition,
+      // true value.
+      auto *Val = PN.getIncomingValue(diffValIdx);
+      auto *IdenticalVal = IdenticalPN.getIncomingValue(diffValIdx);
+      if (!isa<SelectInst>(Val) || !isa<SelectInst>(IdenticalVal))
+        continue;
+
+      auto *SI = cast<SelectInst>(Val);
+      auto *IdenticalSI = cast<SelectInst>(IdenticalVal);
+      if (SI->getParent() != CurBB || IdenticalSI->getParent() != CurBB)
+        continue;
+      if (SI->getCondition() != IdenticalSI->getCondition() ||
+          SI->getTrueValue() != IdenticalSI->getTrueValue())
+        continue;
+
+      // Now check that the false values, i.e., %1 and %identicalPhi,
+      // are essentially the same value within the same BB.
+      auto SameSelAndPhi = [&](SelectInst *SI, PHINode *IdenticalPN,
+                               PHINode *PN) {
+        if (SI->getTrueValue() == IdenticalPN) {
+          return SI->getFalseValue() == PN;
+        }
+        return false;
+      };
+      auto *FalseVal = SI->getFalseValue();
+      auto *IdenticalSIFalseVal =
+          dyn_cast<PHINode>(IdenticalSI->getFalseValue());
+      if (!isa<SelectInst>(FalseVal) || !IdenticalSIFalseVal ||
+          IdenticalSIFalseVal != &IdenticalPN)
+        continue;
+      auto *FalseValSI = cast<SelectInst>(FalseVal);
+      if (FalseValSI->getParent() != CurBB ||
+          !SameSelAndPhi(FalseValSI, &IdenticalPN, &PN))
+        continue;
+
+      ++NumPHICSEs;
+      return replaceInstUsesWith(PN, &IdenticalPN);
+    }
   }
 
   // If this is an integer PHI and we know that it has an illegal type, see if
diff --git a/llvm/test/Transforms/InstCombine/enhanced-phi-cse.ll b/llvm/test/Transforms/InstCombine/enhanced-phi-cse.ll
new file mode 100644
index 0000000000000..79cb59ea8bbbc
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/enhanced-phi-cse.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -passes=instcombine | FileCheck %s
+ at A = extern_weak global float, align 4
+
+; %phi.to.remove acts the same as %v1, and can be eliminated with PHI CSE.
+define void @enhanced_phi_cse(ptr %m, ptr %n, i32 %count) {
+; CHECK-LABEL: @enhanced_phi_cse(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4
+; CHECK-NEXT:    [[C_LOAD:%.*]] = load float, ptr [[C]], align 4
+; CHECK-NEXT:    [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]]
+; CHECK-NEXT:    [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]]
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]]
+; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4
+; CHECK-NEXT:    [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                    ; preds = %entry, %for.body
+  %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
+  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+  %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ]
+  %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ]
+  %q.load = load float, ptr %q
+  %c.load = load float, ptr %c
+  %sub = fsub float %q.load, %c.load
+  %cmp1 = fcmp olt float %sub, %v0
+  %v0.1 = select i1 %cmp1, float %sub, float %v0
+  %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
+  %cmp2 = fcmp ogt float  %sub, %same.as.v1
+  %v1.1 = select i1 %cmp2, float %sub, float %v1
+  %phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1
+  %inc.i = add nuw nsw i32 %i, 1
+  %q.next = getelementptr inbounds i8, ptr %q, i64 4
+  %c.next = getelementptr inbounds i8, ptr %c, i64 4
+  %exitcond = icmp eq i32 %inc.i, %count
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+  store float %vl.1.lcssa, ptr @A
+  ret void
+}

>From fbc4a5f098e6f1e5f96ba8a1f66a1446dd789f1b Mon Sep 17 00:00:00 2001
From: Congzhe Cao <congzhe.cao at huawei.com>
Date: Wed, 15 Oct 2025 19:03:25 -0400
Subject: [PATCH 2/5] address reviewer's comments

---
 .../Transforms/InstCombine/InstCombinePHI.cpp | 63 ++++++++++++-------
 .../InstCombine/enhanced-phi-cse.ll           | 60 ++++++++++++++++++
 2 files changed, 99 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index e736e89a3a146..ffb81d07eef0b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1636,8 +1636,10 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
     //   %phi = phi [ X, %BB0 ], [ %phi.next, %BB1 ]
     //   ...
     //   %identicalPhi.next = select %cmp, %val, %identicalPhi
-    //   %1 = select %cmp2, %identicalPhi, float %phi
+    //                      (or select %cmp, %identicalPhi, %val)
+    //   %1 = select %cmp2, %identicalPhi, %phi
     //   %phi.next = select %cmp, %val, %1
+    //             (or select %cmp, %1, %val)
     //
     // Prove that %phi and %identicalPhi are the same by induction:
     //
@@ -1646,43 +1648,58 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
     // Suppose %phi and %identicalPhi are equal at iteration i.
     // We look at their values at iteration i+1 which are %phi.next and
     // %identicalPhi.next. They would have become different only when %cmp is
-    // false and the corresponding values %1 and %identicalPhi differ.
+    // false and the corresponding values %1 and %identicalPhi differ
+    // (similar reason for the other "or" case in the bracket).
     //
     // The only condition when %1 and %identicalPh could differ is when %cmp2
     // is false and %1 is %phi, which contradicts our inductive hypothesis
     // that %phi and %identicalPhi are equal. Thus %phi and %identicalPhi are
     // always equal at iteration i+1.
 
-    if (PN.getNumIncomingValues() == 2 && PN.getNumUses() == 1) {
-      unsigned diffVals = 0;
-      unsigned diffValIdx = 0;
+    if (PN.getNumIncomingValues() == 2) {
+      unsigned DiffVals = 0;
+      BasicBlock *DiffValBB = nullptr;
       // Check that only the backedge incoming value is different.
       for (unsigned i = 0; i < 2; i++) {
-        if (PN.getIncomingValue(i) != IdenticalPN.getIncomingValue(i)) {
-          diffVals++;
-          diffValIdx = i;
+        BasicBlock *PredBB = PN.getIncomingBlock(i);
+        if (PN.getIncomingValueForBlock(PredBB) !=
+            IdenticalPN.getIncomingValueForBlock(PredBB)) {
+          DiffVals++;
+          DiffValBB = PredBB;
         }
       }
       BasicBlock *CurBB = PN.getParent();
-      if (diffVals == 2 || PN.getIncomingBlock(diffValIdx) != CurBB)
+      if (DiffVals == 2 || DiffValBB != CurBB)
         continue;
       // Now check that the backedge incoming values are two select
-      // instructions that are in the same BB, and have the same condition,
-      // true value.
-      auto *Val = PN.getIncomingValue(diffValIdx);
-      auto *IdenticalVal = IdenticalPN.getIncomingValue(diffValIdx);
+      // instructions that are in the same BB, and have the same condition.
+      // Either their true values are the same, or their false values are
+      // the same.
+      auto *Val = PN.getIncomingValueForBlock(DiffValBB);
+      auto *IdenticalVal = IdenticalPN.getIncomingValueForBlock(DiffValBB);
       if (!isa<SelectInst>(Val) || !isa<SelectInst>(IdenticalVal))
         continue;
 
       auto *SI = cast<SelectInst>(Val);
       auto *IdenticalSI = cast<SelectInst>(IdenticalVal);
-      if (SI->getParent() != CurBB || IdenticalSI->getParent() != CurBB)
+      if (SI->getParent() != CurBB || IdenticalSI->getParent() != CurBB ||
+          SI->getNumUses() != 1)
         continue;
       if (SI->getCondition() != IdenticalSI->getCondition() ||
-          SI->getTrueValue() != IdenticalSI->getTrueValue())
+          (SI->getTrueValue() != IdenticalSI->getTrueValue() &&
+           SI->getFalseValue() != IdenticalSI->getFalseValue()))
         continue;
+      Value *SIOtherVal = nullptr;
+      Value *IdenticalSIOtherVal = nullptr;
+      if (SI->getTrueValue() == IdenticalSI->getTrueValue()) {
+        SIOtherVal = SI->getFalseValue();
+        IdenticalSIOtherVal = IdenticalSI->getFalseValue();
+      } else {
+        SIOtherVal = SI->getTrueValue();
+        IdenticalSIOtherVal = IdenticalSI->getTrueValue();
+      }
 
-      // Now check that the false values, i.e., %1 and %identicalPhi,
+      // Now check that the other values in select, i.e., %1 and %identicalPhi,
       // are essentially the same value within the same BB.
       auto SameSelAndPhi = [&](SelectInst *SI, PHINode *IdenticalPN,
                                PHINode *PN) {
@@ -1691,15 +1708,13 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
         }
         return false;
       };
-      auto *FalseVal = SI->getFalseValue();
-      auto *IdenticalSIFalseVal =
-          dyn_cast<PHINode>(IdenticalSI->getFalseValue());
-      if (!isa<SelectInst>(FalseVal) || !IdenticalSIFalseVal ||
-          IdenticalSIFalseVal != &IdenticalPN)
+      if (!isa<SelectInst>(SIOtherVal) || !isa<PHINode>(IdenticalSIOtherVal))
+        continue;
+      if (cast<PHINode>(IdenticalSIOtherVal) != &IdenticalPN)
         continue;
-      auto *FalseValSI = cast<SelectInst>(FalseVal);
-      if (FalseValSI->getParent() != CurBB ||
-          !SameSelAndPhi(FalseValSI, &IdenticalPN, &PN))
+      auto *SIOtherValAsSel = cast<SelectInst>(SIOtherVal);
+      if (SIOtherValAsSel->getParent() != CurBB ||
+          !SameSelAndPhi(SIOtherValAsSel, &IdenticalPN, &PN))
         continue;
 
       ++NumPHICSEs;
diff --git a/llvm/test/Transforms/InstCombine/enhanced-phi-cse.ll b/llvm/test/Transforms/InstCombine/enhanced-phi-cse.ll
index 79cb59ea8bbbc..69efdc08afd25 100644
--- a/llvm/test/Transforms/InstCombine/enhanced-phi-cse.ll
+++ b/llvm/test/Transforms/InstCombine/enhanced-phi-cse.ll
@@ -59,3 +59,63 @@ exit:
   store float %vl.1.lcssa, ptr @A
   ret void
 }
+
+; %phi.to.remove acts the same as %v1, and can be eliminated with PHI CSE.
+; The difference from enhanced_phi_cse() is that the true and false values in
+; %phi.to.remove.next and %v1.1 are swapped.
+define void @enhanced_phi_cse_2(ptr %m, ptr %n, i32 %count) {
+; CHECK-LABEL: @enhanced_phi_cse_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4
+; CHECK-NEXT:    [[C_LOAD:%.*]] = load float, ptr [[C]], align 4
+; CHECK-NEXT:    [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]]
+; CHECK-NEXT:    [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]]
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2]], float [[V1]], float [[SUB]]
+; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4
+; CHECK-NEXT:    [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                    ; preds = %entry, %for.body
+  %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
+  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+  %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ]
+  %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ]
+  %q.load = load float, ptr %q
+  %c.load = load float, ptr %c
+  %sub = fsub float %q.load, %c.load
+  %cmp1 = fcmp olt float %sub, %v0
+  %v0.1 = select i1 %cmp1, float %sub, float %v0
+  %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
+  %cmp2 = fcmp ogt float  %sub, %same.as.v1
+  %v1.1 = select i1 %cmp2, float %v1, float %sub
+  %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %sub
+  %inc.i = add nuw nsw i32 %i, 1
+  %q.next = getelementptr inbounds i8, ptr %q, i64 4
+  %c.next = getelementptr inbounds i8, ptr %c, i64 4
+  %exitcond = icmp eq i32 %inc.i, %count
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+  store float %vl.1.lcssa, ptr @A
+  ret void
+}

>From 4cfc7bfc3144896a15fdb3c7cceef65a75b60375 Mon Sep 17 00:00:00 2001
From: Congzhe Cao <congzhe.cao at huawei.com>
Date: Fri, 17 Oct 2025 13:05:37 -0400
Subject: [PATCH 3/5] Address reviewer's comments and simplify the code.

---
 llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index ffb81d07eef0b..146a79c7dd8cc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1668,8 +1668,8 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
           DiffValBB = PredBB;
         }
       }
-      BasicBlock *CurBB = PN.getParent();
-      if (DiffVals == 2 || DiffValBB != CurBB)
+
+      if (DiffVals != 1)
         continue;
       // Now check that the backedge incoming values are two select
       // instructions that are in the same BB, and have the same condition.
@@ -1682,9 +1682,6 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
 
       auto *SI = cast<SelectInst>(Val);
       auto *IdenticalSI = cast<SelectInst>(IdenticalVal);
-      if (SI->getParent() != CurBB || IdenticalSI->getParent() != CurBB ||
-          SI->getNumUses() != 1)
-        continue;
       if (SI->getCondition() != IdenticalSI->getCondition() ||
           (SI->getTrueValue() != IdenticalSI->getTrueValue() &&
            SI->getFalseValue() != IdenticalSI->getFalseValue()))
@@ -1713,8 +1710,7 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
       if (cast<PHINode>(IdenticalSIOtherVal) != &IdenticalPN)
         continue;
       auto *SIOtherValAsSel = cast<SelectInst>(SIOtherVal);
-      if (SIOtherValAsSel->getParent() != CurBB ||
-          !SameSelAndPhi(SIOtherValAsSel, &IdenticalPN, &PN))
+      if (!SameSelAndPhi(SIOtherValAsSel, &IdenticalPN, &PN))
         continue;
 
       ++NumPHICSEs;

>From ee823bcac935cffd434e157e9083c8d1fd4f6140 Mon Sep 17 00:00:00 2001
From: Congzhe Cao <congzhe.cao at huawei.com>
Date: Mon, 20 Oct 2025 19:23:15 -0400
Subject: [PATCH 4/5] Re-implemented in InstSimplify.

---
 llvm/lib/Analysis/InstructionSimplify.cpp     |  92 ++++++-
 .../Transforms/InstCombine/InstCombinePHI.cpp | 100 +------
 .../InstCombine/enhanced-phi-cse.ll           | 121 ---------
 .../InstCombine/select_with_identical_phi.ll  | 243 ++++++++++++++++++
 4 files changed, 339 insertions(+), 217 deletions(-)
 delete mode 100644 llvm/test/Transforms/InstCombine/enhanced-phi-cse.ll
 create mode 100644 llvm/test/Transforms/InstCombine/select_with_identical_phi.ll

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index e08ef60dbede3..7533fde284697 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4866,6 +4866,89 @@ static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F,
   return nullptr;
 }
 
+// Look for the following pattern and simplify %1 to %identicalPhi.
+// Here %phi, %1 and %phi.next perform the same functionality as
+// %identicalPhi and hence the select instruction %1 can be folded
+// into %identicalPhi.
+//
+// BB1:
+//   %identicalPhi = phi [ X, %BB0 ], [ %identicalPhi.next, %BB1 ]
+//   %phi = phi [ X, %BB0 ], [ %phi.next, %BB1 ]
+//   ...
+//   %identicalPhi.next = select %cmp, %val, %identicalPhi
+//                      (or select %cmp, %identicalPhi, %val)
+//   %1 = select %cmp2, %identicalPhi, %phi
+//   %phi.next = select %cmp, %val, %1
+//             (or select %cmp, %1, %val)
+//
+// Prove that %phi and %identicalPhi are the same by induction:
+//
+// Base case: Both %phi and %identicalPhi are equal on entry to the loop.
+// Inductive case:
+// Suppose %phi and %identicalPhi are equal at iteration i.
+// We look at their values at iteration i+1 which are %phi.next and
+// %identicalPhi.next. They would have become different only when %cmp is
+// false and the corresponding values %1 and %identicalPhi differ
+// (similar reason for the other "or" case in the bracket).
+//
+// The only condition when %1 and %identicalPh could differ is when %cmp2
+// is false and %1 is %phi, which contradicts our inductive hypothesis
+// that %phi and %identicalPhi are equal. Thus %phi and %identicalPhi are
+// always equal at iteration i+1.
+bool isSimplifierIdenticalPHI(PHINode &PN, PHINode &IdenticalPN) {
+  if (PN.getParent() != IdenticalPN.getParent())
+    return false;
+  if (PN.getNumIncomingValues() != 2)
+    return false;
+
+  // Check that only the backedge incoming value is different.
+  unsigned DiffVals = 0;
+  BasicBlock *DiffValBB = nullptr;
+  for (unsigned i = 0; i < 2; i++) {
+    BasicBlock *PredBB = PN.getIncomingBlock(i);
+    if (PN.getIncomingValueForBlock(PredBB) !=
+        IdenticalPN.getIncomingValueForBlock(PredBB)) {
+      DiffVals++;
+      DiffValBB = PredBB;
+    }
+  }
+  if (DiffVals != 1)
+    return false;
+  // Now check that the backedge incoming values are two select
+  // instructions with the same condition. Either their true
+  // values are the same, or their false values are the same.
+  auto *SI = dyn_cast<SelectInst>(PN.getIncomingValueForBlock(DiffValBB));
+  auto *IdenticalSI =
+      dyn_cast<SelectInst>(IdenticalPN.getIncomingValueForBlock(DiffValBB));
+  if (!SI || !IdenticalSI)
+    return false;
+  if (SI->getCondition() != IdenticalSI->getCondition() ||
+      (SI->getTrueValue() != IdenticalSI->getTrueValue() &&
+       SI->getFalseValue() != IdenticalSI->getFalseValue()))
+    return false;
+
+  SelectInst *SIOtherVal = nullptr;
+  Value *IdenticalSIOtherVal = nullptr;
+  if (SI->getTrueValue() == IdenticalSI->getTrueValue()) {
+    SIOtherVal = dyn_cast<SelectInst>(SI->getFalseValue());
+    IdenticalSIOtherVal = IdenticalSI->getFalseValue();
+  } else {
+    SIOtherVal = dyn_cast<SelectInst>(SI->getTrueValue());
+    IdenticalSIOtherVal = IdenticalSI->getTrueValue();
+  }
+
+  // Now check that the other values in select, i.e., %1 and %identicalPhi,
+  // are essentially the same value within the same BB.
+  if (!SIOtherVal || IdenticalSIOtherVal != &IdenticalPN)
+    return false;
+  if (!(SIOtherVal->getTrueValue() == &IdenticalPN &&
+        SIOtherVal->getFalseValue() == &PN) &&
+      !(SIOtherVal->getTrueValue() == &PN &&
+        SIOtherVal->getFalseValue() == &IdenticalPN))
+    return false;
+  return true;
+}
+
 /// Given operands for a SelectInst, see if we can fold the result.
 /// If not, this returns null.
 static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
@@ -5041,7 +5124,14 @@ static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
   std::optional<bool> Imp = isImpliedByDomCondition(Cond, Q.CxtI, Q.DL);
   if (Imp)
     return *Imp ? TrueVal : FalseVal;
-
+  // Look for same PHIs in the true and false values.
+  if (auto *TruePHI = dyn_cast<PHINode>(TrueVal))
+    if (auto *FalsePHI = dyn_cast<PHINode>(FalseVal)) {
+      if (isSimplifierIdenticalPHI(*TruePHI, *FalsePHI))
+        return FalseVal;
+      if (isSimplifierIdenticalPHI(*FalsePHI, *TruePHI))
+        return TrueVal;
+    }
   return nullptr;
 }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 146a79c7dd8cc..9815644f5f43d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1621,101 +1621,11 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
     // Note that even though we've just canonicalized this PHI, due to the
     // worklist visitation order, there are no guarantess that *every* PHI
     // has been canonicalized, so we can't just compare operands ranges.
-    if (PN.isIdenticalToWhenDefined(&IdenticalPN)) {
-      // Just use that PHI instead then.
-      ++NumPHICSEs;
-      return replaceInstUsesWith(PN, &IdenticalPN);
-    }
-
-    // Look for the following pattern and do PHI CSE to clean up the
-    // redundant %phi. Here %phi, %1 and %phi.next perform the same
-    // functionality as %identicalPhi and hence %phi can be eliminated.
-    //
-    // BB1:
-    //   %identicalPhi = phi [ X, %BB0 ], [ %identicalPhi.next, %BB1 ]
-    //   %phi = phi [ X, %BB0 ], [ %phi.next, %BB1 ]
-    //   ...
-    //   %identicalPhi.next = select %cmp, %val, %identicalPhi
-    //                      (or select %cmp, %identicalPhi, %val)
-    //   %1 = select %cmp2, %identicalPhi, %phi
-    //   %phi.next = select %cmp, %val, %1
-    //             (or select %cmp, %1, %val)
-    //
-    // Prove that %phi and %identicalPhi are the same by induction:
-    //
-    // Base case: Both %phi and %identicalPhi are equal on entry to the loop.
-    // Inductive case:
-    // Suppose %phi and %identicalPhi are equal at iteration i.
-    // We look at their values at iteration i+1 which are %phi.next and
-    // %identicalPhi.next. They would have become different only when %cmp is
-    // false and the corresponding values %1 and %identicalPhi differ
-    // (similar reason for the other "or" case in the bracket).
-    //
-    // The only condition when %1 and %identicalPh could differ is when %cmp2
-    // is false and %1 is %phi, which contradicts our inductive hypothesis
-    // that %phi and %identicalPhi are equal. Thus %phi and %identicalPhi are
-    // always equal at iteration i+1.
-
-    if (PN.getNumIncomingValues() == 2) {
-      unsigned DiffVals = 0;
-      BasicBlock *DiffValBB = nullptr;
-      // Check that only the backedge incoming value is different.
-      for (unsigned i = 0; i < 2; i++) {
-        BasicBlock *PredBB = PN.getIncomingBlock(i);
-        if (PN.getIncomingValueForBlock(PredBB) !=
-            IdenticalPN.getIncomingValueForBlock(PredBB)) {
-          DiffVals++;
-          DiffValBB = PredBB;
-        }
-      }
-
-      if (DiffVals != 1)
-        continue;
-      // Now check that the backedge incoming values are two select
-      // instructions that are in the same BB, and have the same condition.
-      // Either their true values are the same, or their false values are
-      // the same.
-      auto *Val = PN.getIncomingValueForBlock(DiffValBB);
-      auto *IdenticalVal = IdenticalPN.getIncomingValueForBlock(DiffValBB);
-      if (!isa<SelectInst>(Val) || !isa<SelectInst>(IdenticalVal))
-        continue;
-
-      auto *SI = cast<SelectInst>(Val);
-      auto *IdenticalSI = cast<SelectInst>(IdenticalVal);
-      if (SI->getCondition() != IdenticalSI->getCondition() ||
-          (SI->getTrueValue() != IdenticalSI->getTrueValue() &&
-           SI->getFalseValue() != IdenticalSI->getFalseValue()))
-        continue;
-      Value *SIOtherVal = nullptr;
-      Value *IdenticalSIOtherVal = nullptr;
-      if (SI->getTrueValue() == IdenticalSI->getTrueValue()) {
-        SIOtherVal = SI->getFalseValue();
-        IdenticalSIOtherVal = IdenticalSI->getFalseValue();
-      } else {
-        SIOtherVal = SI->getTrueValue();
-        IdenticalSIOtherVal = IdenticalSI->getTrueValue();
-      }
-
-      // Now check that the other values in select, i.e., %1 and %identicalPhi,
-      // are essentially the same value within the same BB.
-      auto SameSelAndPhi = [&](SelectInst *SI, PHINode *IdenticalPN,
-                               PHINode *PN) {
-        if (SI->getTrueValue() == IdenticalPN) {
-          return SI->getFalseValue() == PN;
-        }
-        return false;
-      };
-      if (!isa<SelectInst>(SIOtherVal) || !isa<PHINode>(IdenticalSIOtherVal))
-        continue;
-      if (cast<PHINode>(IdenticalSIOtherVal) != &IdenticalPN)
-        continue;
-      auto *SIOtherValAsSel = cast<SelectInst>(SIOtherVal);
-      if (!SameSelAndPhi(SIOtherValAsSel, &IdenticalPN, &PN))
-        continue;
-
-      ++NumPHICSEs;
-      return replaceInstUsesWith(PN, &IdenticalPN);
-    }
+    if (!PN.isIdenticalToWhenDefined(&IdenticalPN))
+      continue;
+    // Just use that PHI instead then.
+    ++NumPHICSEs;
+    return replaceInstUsesWith(PN, &IdenticalPN);
   }
 
   // If this is an integer PHI and we know that it has an illegal type, see if
diff --git a/llvm/test/Transforms/InstCombine/enhanced-phi-cse.ll b/llvm/test/Transforms/InstCombine/enhanced-phi-cse.ll
deleted file mode 100644
index 69efdc08afd25..0000000000000
--- a/llvm/test/Transforms/InstCombine/enhanced-phi-cse.ll
+++ /dev/null
@@ -1,121 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -S -passes=instcombine | FileCheck %s
- at A = extern_weak global float, align 4
-
-; %phi.to.remove acts the same as %v1, and can be eliminated with PHI CSE.
-define void @enhanced_phi_cse(ptr %m, ptr %n, i32 %count) {
-; CHECK-LABEL: @enhanced_phi_cse(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4
-; CHECK-NEXT:    [[C_LOAD:%.*]] = load float, ptr [[C]], align 4
-; CHECK-NEXT:    [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]]
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]]
-; CHECK-NEXT:    [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]]
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]]
-; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]]
-; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
-; CHECK-NEXT:    [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4
-; CHECK-NEXT:    [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
-; CHECK:       exit:
-; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %for.body
-
-for.body:                                    ; preds = %entry, %for.body
-  %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
-  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
-  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
-  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
-  %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ]
-  %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ]
-  %q.load = load float, ptr %q
-  %c.load = load float, ptr %c
-  %sub = fsub float %q.load, %c.load
-  %cmp1 = fcmp olt float %sub, %v0
-  %v0.1 = select i1 %cmp1, float %sub, float %v0
-  %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
-  %cmp2 = fcmp ogt float  %sub, %same.as.v1
-  %v1.1 = select i1 %cmp2, float %sub, float %v1
-  %phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1
-  %inc.i = add nuw nsw i32 %i, 1
-  %q.next = getelementptr inbounds i8, ptr %q, i64 4
-  %c.next = getelementptr inbounds i8, ptr %c, i64 4
-  %exitcond = icmp eq i32 %inc.i, %count
-  br i1 %exitcond, label %exit, label %for.body
-
-exit:
-  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
-  store float %vl.1.lcssa, ptr @A
-  ret void
-}
-
-; %phi.to.remove acts the same as %v1, and can be eliminated with PHI CSE.
-; The difference from enhanced_phi_cse() is that the true and false values in
-; %phi.to.remove.next and %v1.1 are swapped.
-define void @enhanced_phi_cse_2(ptr %m, ptr %n, i32 %count) {
-; CHECK-LABEL: @enhanced_phi_cse_2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4
-; CHECK-NEXT:    [[C_LOAD:%.*]] = load float, ptr [[C]], align 4
-; CHECK-NEXT:    [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]]
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]]
-; CHECK-NEXT:    [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]]
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]]
-; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2]], float [[V1]], float [[SUB]]
-; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
-; CHECK-NEXT:    [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4
-; CHECK-NEXT:    [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
-; CHECK:       exit:
-; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %for.body
-
-for.body:                                    ; preds = %entry, %for.body
-  %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
-  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
-  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
-  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
-  %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ]
-  %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ]
-  %q.load = load float, ptr %q
-  %c.load = load float, ptr %c
-  %sub = fsub float %q.load, %c.load
-  %cmp1 = fcmp olt float %sub, %v0
-  %v0.1 = select i1 %cmp1, float %sub, float %v0
-  %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
-  %cmp2 = fcmp ogt float  %sub, %same.as.v1
-  %v1.1 = select i1 %cmp2, float %v1, float %sub
-  %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %sub
-  %inc.i = add nuw nsw i32 %i, 1
-  %q.next = getelementptr inbounds i8, ptr %q, i64 4
-  %c.next = getelementptr inbounds i8, ptr %c, i64 4
-  %exitcond = icmp eq i32 %inc.i, %count
-  br i1 %exitcond, label %exit, label %for.body
-
-exit:
-  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
-  store float %vl.1.lcssa, ptr @A
-  ret void
-}
diff --git a/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll b/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll
new file mode 100644
index 0000000000000..7816781250799
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll
@@ -0,0 +1,243 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -passes=instcombine | FileCheck %s
+ at A = extern_weak global float, align 4
+
+; %same.as.v1 is a select with two phis %v1 and %phi.to.remove as the true
+; and false values, while %v1 and %phi.to.remove are actually the same.
+; Fold the selection instruction %same.as.v1 to %v1.
+define void @select_with_identical_phi(ptr %m, ptr %n, i32 %count) {
+; CHECK-LABEL: @select_with_identical_phi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4
+; CHECK-NEXT:    [[C_LOAD:%.*]] = load float, ptr [[C]], align 4
+; CHECK-NEXT:    [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]]
+; CHECK-NEXT:    [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]]
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]]
+; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4
+; CHECK-NEXT:    [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                    ; preds = %entry, %for.body
+  %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
+  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+  %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ]
+  %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ]
+  %q.load = load float, ptr %q
+  %c.load = load float, ptr %c
+  %sub = fsub float %q.load, %c.load
+  %cmp1 = fcmp olt float %sub, %v0
+  %v0.1 = select i1 %cmp1, float %sub, float %v0
+  %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
+  %cmp2 = fcmp ogt float  %sub, %same.as.v1
+  %v1.1 = select i1 %cmp2, float %sub, float %v1
+  %phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1
+  %inc.i = add nuw nsw i32 %i, 1
+  %q.next = getelementptr inbounds i8, ptr %q, i64 4
+  %c.next = getelementptr inbounds i8, ptr %c, i64 4
+  %exitcond = icmp eq i32 %inc.i, %count
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+  store float %vl.1.lcssa, ptr @A
+  ret void
+}
+
+; The difference from select_with_identical_phi() is that the true and false values in
+; %phi.to.remove.next and %v1.1 are swapped.
+; Check that %same.as.v1 can be folded.
+define void @select_with_identical_phi_2(ptr %m, ptr %n, i32 %count) {
+; CHECK-LABEL: @select_with_identical_phi_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4
+; CHECK-NEXT:    [[C_LOAD:%.*]] = load float, ptr [[C]], align 4
+; CHECK-NEXT:    [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]]
+; CHECK-NEXT:    [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]]
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2]], float [[V1]], float [[SUB]]
+; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4
+; CHECK-NEXT:    [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                    ; preds = %entry, %for.body
+  %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
+  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+  %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ]
+  %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ]
+  %q.load = load float, ptr %q
+  %c.load = load float, ptr %c
+  %sub = fsub float %q.load, %c.load
+  %cmp1 = fcmp olt float %sub, %v0
+  %v0.1 = select i1 %cmp1, float %sub, float %v0
+  %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
+  %cmp2 = fcmp ogt float  %sub, %same.as.v1
+  %v1.1 = select i1 %cmp2, float %v1, float %sub
+  %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %sub
+  %inc.i = add nuw nsw i32 %i, 1
+  %q.next = getelementptr inbounds i8, ptr %q, i64 4
+  %c.next = getelementptr inbounds i8, ptr %c, i64 4
+  %exitcond = icmp eq i32 %inc.i, %count
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+  store float %vl.1.lcssa, ptr @A
+  ret void
+}
+
+; The difference from select_with_identical_phi() is that the true and false values in
+; same.as.v1 are swapped.
+; Check that %same.as.v1 can be folded.
+define void @select_with_identical_phi_3(ptr %m, ptr %n, i32 %count) {
+; CHECK-LABEL: @select_with_identical_phi_3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4
+; CHECK-NEXT:    [[C_LOAD:%.*]] = load float, ptr [[C]], align 4
+; CHECK-NEXT:    [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]]
+; CHECK-NEXT:    [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]]
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]]
+; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4
+; CHECK-NEXT:    [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                    ; preds = %entry, %for.body
+  %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
+  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+  %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ]
+  %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ]
+  %q.load = load float, ptr %q
+  %c.load = load float, ptr %c
+  %sub = fsub float %q.load, %c.load
+  %cmp1 = fcmp olt float %sub, %v0
+  %v0.1 = select i1 %cmp1, float %sub, float %v0
+  %same.as.v1 = select i1 %cmp1, float %phi.to.remove, float %v1
+  %cmp2 = fcmp ogt float  %sub, %same.as.v1
+  %v1.1 = select i1 %cmp2, float %sub, float %v1
+  %phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1
+  %inc.i = add nuw nsw i32 %i, 1
+  %q.next = getelementptr inbounds i8, ptr %q, i64 4
+  %c.next = getelementptr inbounds i8, ptr %c, i64 4
+  %exitcond = icmp eq i32 %inc.i, %count
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+  store float %vl.1.lcssa, ptr @A
+  ret void
+}
+
+; The difference from select_with_identical_phi() is that the true and false values in
+; %same.as.v1, %phi.to.remove.next and %v1.1 are swapped.
+; Check that %same.as.v1 can be folded.
+define void @select_with_identical_phi_4(ptr %m, ptr %n, i32 %count) {
+; CHECK-LABEL: @select_with_identical_phi_4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4
+; CHECK-NEXT:    [[C_LOAD:%.*]] = load float, ptr [[C]], align 4
+; CHECK-NEXT:    [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]]
+; CHECK-NEXT:    [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]]
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2]], float [[V1]], float [[SUB]]
+; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4
+; CHECK-NEXT:    [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                    ; preds = %entry, %for.body
+  %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
+  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+  %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ]
+  %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ]
+  %q.load = load float, ptr %q
+  %c.load = load float, ptr %c
+  %sub = fsub float %q.load, %c.load
+  %cmp1 = fcmp olt float %sub, %v0
+  %v0.1 = select i1 %cmp1, float %sub, float %v0
+  %same.as.v1 = select i1 %cmp1, float %phi.to.remove, float %v1
+  %cmp2 = fcmp ogt float  %sub, %same.as.v1
+  %v1.1 = select i1 %cmp2, float %v1, float %sub
+  %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %sub
+  %inc.i = add nuw nsw i32 %i, 1
+  %q.next = getelementptr inbounds i8, ptr %q, i64 4
+  %c.next = getelementptr inbounds i8, ptr %c, i64 4
+  %exitcond = icmp eq i32 %inc.i, %count
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+  store float %vl.1.lcssa, ptr @A
+  ret void
+}

>From 60777d5129a2543e9b109b98a2e4bfc87326155c Mon Sep 17 00:00:00 2001
From: Congzhe Cao <congzhe.cao at huawei.com>
Date: Tue, 21 Oct 2025 12:30:12 -0400
Subject: [PATCH 5/5] Address reviewer's comments made on Oct 21st.

---
 llvm/lib/Analysis/InstructionSimplify.cpp | 66 +++++++++++------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 7533fde284697..528fd3f479fe0 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4866,35 +4866,35 @@ static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F,
   return nullptr;
 }
 
-// Look for the following pattern and simplify %1 to %identicalPhi.
-// Here %phi, %1 and %phi.next perform the same functionality as
-// %identicalPhi and hence the select instruction %1 can be folded
-// into %identicalPhi.
-//
-// BB1:
-//   %identicalPhi = phi [ X, %BB0 ], [ %identicalPhi.next, %BB1 ]
-//   %phi = phi [ X, %BB0 ], [ %phi.next, %BB1 ]
-//   ...
-//   %identicalPhi.next = select %cmp, %val, %identicalPhi
-//                      (or select %cmp, %identicalPhi, %val)
-//   %1 = select %cmp2, %identicalPhi, %phi
-//   %phi.next = select %cmp, %val, %1
-//             (or select %cmp, %1, %val)
-//
-// Prove that %phi and %identicalPhi are the same by induction:
-//
-// Base case: Both %phi and %identicalPhi are equal on entry to the loop.
-// Inductive case:
-// Suppose %phi and %identicalPhi are equal at iteration i.
-// We look at their values at iteration i+1 which are %phi.next and
-// %identicalPhi.next. They would have become different only when %cmp is
-// false and the corresponding values %1 and %identicalPhi differ
-// (similar reason for the other "or" case in the bracket).
-//
-// The only condition when %1 and %identicalPh could differ is when %cmp2
-// is false and %1 is %phi, which contradicts our inductive hypothesis
-// that %phi and %identicalPhi are equal. Thus %phi and %identicalPhi are
-// always equal at iteration i+1.
+/// Look for the following pattern and simplify %1 to %identicalPhi.
+/// Here %phi, %1 and %phi.next perform the same functionality as
+/// %identicalPhi and hence the select instruction %1 can be folded
+/// into %identicalPhi.
+///
+/// BB1:
+///   %identicalPhi = phi [ X, %BB0 ], [ %identicalPhi.next, %BB1 ]
+///   %phi = phi [ X, %BB0 ], [ %phi.next, %BB1 ]
+///   ...
+///   %identicalPhi.next = select %cmp, %val, %identicalPhi
+///                      (or select %cmp, %identicalPhi, %val)
+///   %1 = select %cmp2, %identicalPhi, %phi
+///   %phi.next = select %cmp, %val, %1
+///             (or select %cmp, %1, %val)
+///
+/// Prove that %phi and %identicalPhi are the same by induction:
+///
+/// Base case: Both %phi and %identicalPhi are equal on entry to the loop.
+/// Inductive case:
+/// Suppose %phi and %identicalPhi are equal at iteration i.
+/// We look at their values at iteration i+1 which are %phi.next and
+/// %identicalPhi.next. They would have become different only when %cmp is
+/// false and the corresponding values %1 and %identicalPhi differ
+/// (similar reason for the other "or" case in the bracket).
+///
+/// The only condition when %1 and %identicalPh could differ is when %cmp2
+/// is false and %1 is %phi, which contradicts our inductive hypothesis
+/// that %phi and %identicalPhi are equal. Thus %phi and %identicalPhi are
+/// always equal at iteration i+1.
 bool isSimplifierIdenticalPHI(PHINode &PN, PHINode &IdenticalPN) {
   if (PN.getParent() != IdenticalPN.getParent())
     return false;
@@ -4922,9 +4922,7 @@ bool isSimplifierIdenticalPHI(PHINode &PN, PHINode &IdenticalPN) {
       dyn_cast<SelectInst>(IdenticalPN.getIncomingValueForBlock(DiffValBB));
   if (!SI || !IdenticalSI)
     return false;
-  if (SI->getCondition() != IdenticalSI->getCondition() ||
-      (SI->getTrueValue() != IdenticalSI->getTrueValue() &&
-       SI->getFalseValue() != IdenticalSI->getFalseValue()))
+  if (SI->getCondition() != IdenticalSI->getCondition())
     return false;
 
   SelectInst *SIOtherVal = nullptr;
@@ -4932,9 +4930,11 @@ bool isSimplifierIdenticalPHI(PHINode &PN, PHINode &IdenticalPN) {
   if (SI->getTrueValue() == IdenticalSI->getTrueValue()) {
     SIOtherVal = dyn_cast<SelectInst>(SI->getFalseValue());
     IdenticalSIOtherVal = IdenticalSI->getFalseValue();
-  } else {
+  } else if (SI->getFalseValue() == IdenticalSI->getFalseValue()) {
     SIOtherVal = dyn_cast<SelectInst>(SI->getTrueValue());
     IdenticalSIOtherVal = IdenticalSI->getTrueValue();
+  } else {
+    return false;
   }
 
   // Now check that the other values in select, i.e., %1 and %identicalPhi,