[llvm] 21e3a21 - [InstCombine] Replace an integer comparison of a `phi` node with multiple `ucmp`/`scmp` operands and a constant with `phi` of individual comparisons of original intrinsic's arguments (#107769)

via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 13 11:50:31 PDT 2024


Author: Volodymyr Vasylkun
Date: 2024-09-13T19:50:27+01:00
New Revision: 21e3a212c570dc80055742ef8abbd0a306ff9135

URL: https://github.com/llvm/llvm-project/commit/21e3a212c570dc80055742ef8abbd0a306ff9135
DIFF: https://github.com/llvm/llvm-project/commit/21e3a212c570dc80055742ef8abbd0a306ff9135.diff

LOG: [InstCombine] Replace an integer comparison of a `phi` node with multiple `ucmp`/`scmp` operands and a constant with `phi` of individual comparisons of original intrinsic's arguments (#107769)

When we have a `phi` instruction with more than one of its incoming
values being a call to `ucmp` or `scmp`, which is then compared with an
integer constant, we can move the comparison through the `phi` into the
incoming basic blocks because we know that a comparison of `ucmp`/`scmp`
with a constant will be simplified by the next iteration of InstCombine.

There's a high chance that other similar patterns can be identified, in
which case they can be easily handled by the same code by moving the
check for "simplifiable" instructions into a lambda.

Added: 
    llvm/test/Transforms/InstCombine/phi-with-multiple-unsimplifiable-values.ll

Modified: 
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 2bdedfed47bdca..aa3f3fbdaeffa0 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1809,8 +1809,8 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
   // Check to see whether the instruction can be folded into each phi operand.
   // If there is one operand that does not fold, remember the BB it is in.
   SmallVector<Value *> NewPhiValues;
-  BasicBlock *NonSimplifiedBB = nullptr;
-  Value *NonSimplifiedInVal = nullptr;
+  SmallVector<unsigned int> OpsToMoveUseToIncomingBB;
+  bool SeenNonSimplifiedInVal = false;
   for (unsigned i = 0; i != NumPHIValues; ++i) {
     Value *InVal = PN->getIncomingValue(i);
     BasicBlock *InBB = PN->getIncomingBlock(i);
@@ -1820,35 +1820,64 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
       continue;
     }
 
-    if (NonSimplifiedBB) return nullptr;  // More than one non-simplified value.
+    // If the only use of phi is comparing it with a constant then we can
+    // put this comparison in the incoming BB directly after a ucmp/scmp call
+    // because we know that it will simplify to a single icmp.
+    // NOTE: the single-use check here is not only to ensure that the
+    // optimization is profitable, but also to avoid creating a potentially
+    // invalid phi node when we have a multi-edge in the CFG.
+    const APInt *Ignored;
+    if (isa<CmpIntrinsic>(InVal) && InVal->hasOneUse() &&
+        match(&I, m_ICmp(m_Specific(PN), m_APInt(Ignored)))) {
+      OpsToMoveUseToIncomingBB.push_back(i);
+      NewPhiValues.push_back(nullptr);
+      continue;
+    }
+
+    if (SeenNonSimplifiedInVal)
+      return nullptr; // More than one non-simplified value.
+    SeenNonSimplifiedInVal = true;
+
+    // If there is exactly one non-simplified value, we can insert a copy of the
+    // operation in that block.  However, if this is a critical edge, we would
+    // be inserting the computation on some other paths (e.g. inside a loop).
+    // Only do this if the pred block is unconditionally branching into the phi
+    // block. Also, make sure that the pred block is not dead code.
+    BranchInst *BI = dyn_cast<BranchInst>(InBB->getTerminator());
+    if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(InBB))
+      return nullptr;
 
-    NonSimplifiedBB = InBB;
-    NonSimplifiedInVal = InVal;
     NewPhiValues.push_back(nullptr);
+    OpsToMoveUseToIncomingBB.push_back(i);
 
     // If the InVal is an invoke at the end of the pred block, then we can't
     // insert a computation after it without breaking the edge.
     if (isa<InvokeInst>(InVal))
-      if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB)
+      if (cast<Instruction>(InVal)->getParent() == InBB)
         return nullptr;
 
     // Do not push the operation across a loop backedge. This could result in
     // an infinite combine loop, and is generally non-profitable (especially
     // if the operation was originally outside the loop).
-    if (isBackEdge(NonSimplifiedBB, PN->getParent()))
+    if (isBackEdge(InBB, PN->getParent()))
       return nullptr;
   }
 
-  // If there is exactly one non-simplified value, we can insert a copy of the
-  // operation in that block.  However, if this is a critical edge, we would be
-  // inserting the computation on some other paths (e.g. inside a loop).  Only
-  // do this if the pred block is unconditionally branching into the phi block.
-  // Also, make sure that the pred block is not dead code.
-  if (NonSimplifiedBB != nullptr) {
-    BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator());
-    if (!BI || !BI->isUnconditional() ||
-        !DT.isReachableFromEntry(NonSimplifiedBB))
-      return nullptr;
+  // Clone the instruction that uses the phi node and move it into the incoming
+  // BB because we know that the next iteration of InstCombine will simplify it.
+  for (auto OpIndex : OpsToMoveUseToIncomingBB) {
+    Value *Op = PN->getIncomingValue(OpIndex);
+    BasicBlock *OpBB = PN->getIncomingBlock(OpIndex);
+
+    Instruction *Clone = I.clone();
+    for (Use &U : Clone->operands()) {
+      if (U == PN)
+        U = Op;
+      else
+        U = U->DoPHITranslation(PN->getParent(), OpBB);
+    }
+    Clone = InsertNewInstBefore(Clone, OpBB->getTerminator()->getIterator());
+    NewPhiValues[OpIndex] = Clone;
   }
 
   // Okay, we can do the transformation: create the new PHI node.
@@ -1857,30 +1886,13 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
   NewPN->takeName(PN);
   NewPN->setDebugLoc(PN->getDebugLoc());
 
-  // If we are going to have to insert a new computation, do so right before the
-  // predecessor's terminator.
-  Instruction *Clone = nullptr;
-  if (NonSimplifiedBB) {
-    Clone = I.clone();
-    for (Use &U : Clone->operands()) {
-      if (U == PN)
-        U = NonSimplifiedInVal;
-      else
-        U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
-    }
-    InsertNewInstBefore(Clone, NonSimplifiedBB->getTerminator()->getIterator());
-  }
-
-  for (unsigned i = 0; i != NumPHIValues; ++i) {
-    if (NewPhiValues[i])
-      NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
-    else
-      NewPN->addIncoming(Clone, PN->getIncomingBlock(i));
-  }
+  for (unsigned i = 0; i != NumPHIValues; ++i)
+    NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
 
   for (User *U : make_early_inc_range(PN->users())) {
     Instruction *User = cast<Instruction>(U);
-    if (User == &I) continue;
+    if (User == &I)
+      continue;
     replaceInstUsesWith(*User, NewPN);
     eraseInstFromFunction(*User);
   }

diff  --git a/llvm/test/Transforms/InstCombine/phi-with-multiple-unsimplifiable-values.ll b/llvm/test/Transforms/InstCombine/phi-with-multiple-unsimplifiable-values.ll
new file mode 100644
index 00000000000000..2b75d5c5475117
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/phi-with-multiple-unsimplifiable-values.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+declare void @use(i8 %value);
+
+; Since we know that any comparison of ucmp/scmp with a constant will result in
+; a comparison of ucmp/scmp's operands, we can propagate such a comparison
+; through the phi node and let the next iteration of instcombine simplify it.
+define i1 @icmp_of_phi_of_scmp_with_constant(i1 %c, i16 %x, i16 %y)
+; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_constant(
+; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
+; CHECK:       [[TRUE]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i16 [[X]], [[Y]]
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[FALSE]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[Y]], [[X]]
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[R:%.*]] = phi i1 [ [[TMP0]], %[[TRUE]] ], [ [[TMP1]], %[[FALSE]] ]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+{
+entry:
+  br i1 %c, label %true, label %false
+true:
+  %cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
+  br label %exit
+false:
+  %cmp2 = call i8 @llvm.scmp(i16 %y, i16 %x)
+  br label %exit
+exit:
+  %phi = phi i8 [%cmp1, %true], [%cmp2, %false]
+  %r = icmp slt i8 %phi, 0
+  ret i1 %r
+}
+
+; When one of the incoming values is ucmp/scmp and the other is not we can still perform the transformation
+define i1 @icmp_of_phi_of_one_scmp_with_constant(i1 %c, i16 %x, i16 %y, i8 %false_val)
+; CHECK-LABEL: define i1 @icmp_of_phi_of_one_scmp_with_constant(
+; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]], i8 [[FALSE_VAL:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
+; CHECK:       [[TRUE]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i16 [[X]], [[Y]]
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[FALSE]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[FALSE_VAL]], 0
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i1 [ [[TMP0]], %[[TRUE]] ], [ [[TMP1]], %[[FALSE]] ]
+; CHECK-NEXT:    ret i1 [[PHI]]
+;
+{
+entry:
+  br i1 %c, label %true, label %false
+true:
+  %cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
+  br label %exit
+false:
+  br label %exit
+exit:
+  %phi = phi i8 [%cmp1, %true], [%false_val, %false]
+  %r = icmp slt i8 %phi, 0
+  ret i1 %r
+}
+
+; Negative test: the RHS of comparison that uses the phi node is not constant
+define i1 @icmp_of_phi_of_scmp_with_non_constant(i1 %c, i16 %x, i16 %y, i8 %cmp)
+; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_non_constant(
+; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]], i8 [[CMP:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
+; CHECK:       [[TRUE]]:
+; CHECK-NEXT:    [[CMP1:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[X]], i16 [[Y]])
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[FALSE]]:
+; CHECK-NEXT:    [[CMP2:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[Y]], i16 [[X]])
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i8 [ [[CMP1]], %[[TRUE]] ], [ [[CMP2]], %[[FALSE]] ]
+; CHECK-NEXT:    [[R:%.*]] = icmp slt i8 [[PHI]], [[CMP]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+{
+entry:
+  br i1 %c, label %true, label %false
+true:
+  %cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
+  br label %exit
+false:
+  %cmp2 = call i8 @llvm.scmp(i16 %y, i16 %x)
+  br label %exit
+exit:
+  %phi = phi i8 [%cmp1, %true], [%cmp2, %false]
+  %r = icmp slt i8 %phi, %cmp
+  ret i1 %r
+}
+
+; Negative test: more than one incoming value of the phi node is not one-use
+define i1 @icmp_of_phi_of_scmp_with_constant_not_one_use(i1 %c, i16 %x, i16 %y)
+; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_constant_not_one_use(
+; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
+; CHECK:       [[TRUE]]:
+; CHECK-NEXT:    [[CMP1:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[X]], i16 [[Y]])
+; CHECK-NEXT:    call void @use(i8 [[CMP1]])
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[FALSE]]:
+; CHECK-NEXT:    [[CMP2:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[Y]], i16 [[X]])
+; CHECK-NEXT:    call void @use(i8 [[CMP2]])
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i8 [ [[CMP1]], %[[TRUE]] ], [ [[CMP2]], %[[FALSE]] ]
+; CHECK-NEXT:    [[R:%.*]] = icmp slt i8 [[PHI]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+{
+entry:
+  br i1 %c, label %true, label %false
+true:
+  %cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
+  call void @use(i8 %cmp1)
+  br label %exit
+false:
+  %cmp2 = call i8 @llvm.scmp(i16 %y, i16 %x)
+  call void @use(i8 %cmp2)
+  br label %exit
+exit:
+  %phi = phi i8 [%cmp1, %true], [%cmp2, %false]
+  %r = icmp slt i8 %phi, 0
+  ret i1 %r
+}


        


More information about the llvm-commits mailing list