[llvm] r312514 - [PowerPC] eliminate redundant compare instruction

Hiroshi Inoue via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 4 21:15:17 PDT 2017


Author: inouehrs
Date: Mon Sep  4 21:15:17 2017
New Revision: 312514

URL: http://llvm.org/viewvc/llvm-project?rev=312514&view=rev
Log:
[PowerPC] eliminate redundant compare instruction

If multiple conditional branches are executed based on the same comparison, we can execute multiple conditional branches based on the result of one comparison on PPC. For example,

if (a == 0) { ... }
else if (a < 0) { ... }

can be executed by one compare and two conditional branches instead of two pairs of a compare and a conditional branch.

This patch identifies a code sequence of the two pairs of a compare and a conditional branch and merge the compares if possible.
To maximize the opportunity, we do canonicalization of code sequence before merging compares.
For the above example, the input for this pass looks like:

cmplwi r3, 0
beq    0, .LBB0_3
cmpwi  r3, -1
bgt    0, .LBB0_4

So, before merging two compares, we canonicalize it as

cmpwi  r3, 0       ; cmplwi and cmpwi yield same result for beq
beq    0, .LBB0_3
cmpwi  r3, 0       ; greather than -1 means greater or equal to 0
bge    0, .LBB0_4

The generated code should be

cmpwi  r3, 0
beq    0, .LBB0_3
bge    0, .LBB0_4

Differential Revision: https://reviews.llvm.org/D37211


Added:
    llvm/trunk/test/CodeGen/PowerPC/cmp_elimination.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp

Modified: llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp?rev=312514&r1=312513&r2=312514&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp Mon Sep  4 21:15:17 2017
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
+#include "MCTargetDesc/PPCPredicates.h"
 
 using namespace llvm;
 
@@ -56,6 +57,9 @@ private:
   // Perform peepholes.
   bool simplifyCode(void);
 
+  // Perform peepholes.
+  bool eliminateRedundantCompare(void);
+
   // Find the "true" register represented by SrcReg (following chains
   // of copies and subreg_to_reg operations).
   unsigned lookThruCopyLike(unsigned SrcReg);
@@ -346,6 +350,301 @@ bool PPCMIPeephole::simplifyCode(void) {
     }
   }
 
+  // We try to eliminate redundant compare instruction.
+  Simplified |= eliminateRedundantCompare();
+
+  return Simplified;
+}
+
+// helper functions for eliminateRedundantCompare
+static bool isEqOrNe(MachineInstr *BI) {
+  PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
+  unsigned PredCond = PPC::getPredicateCondition(Pred);
+  return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
+}
+
+static bool isSupportedCmpOp(unsigned opCode) {
+  return (opCode == PPC::CMPLD  || opCode == PPC::CMPD  ||
+          opCode == PPC::CMPLW  || opCode == PPC::CMPW  ||
+          opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
+          opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
+}
+
+static bool is64bitCmpOp(unsigned opCode) {
+  return (opCode == PPC::CMPLD  || opCode == PPC::CMPD ||
+          opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
+}
+
+static bool isSignedCmpOp(unsigned opCode) {
+  return (opCode == PPC::CMPD  || opCode == PPC::CMPW ||
+          opCode == PPC::CMPDI || opCode == PPC::CMPWI);
+}
+
+static unsigned getSignedCmpOpCode(unsigned opCode) {
+  if (opCode == PPC::CMPLD)  return PPC::CMPD;
+  if (opCode == PPC::CMPLW)  return PPC::CMPW;
+  if (opCode == PPC::CMPLDI) return PPC::CMPDI;
+  if (opCode == PPC::CMPLWI) return PPC::CMPWI;
+  return opCode;
+}
+
+// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
+// (LT x) to (LE x-1)
+static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
+  uint64_t Imm = CMPI->getOperand(2).getImm();
+  bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
+  if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
+    return 0;
+
+  PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
+  unsigned PredCond = PPC::getPredicateCondition(Pred);
+  unsigned PredHint = PPC::getPredicateHint(Pred);
+  if (PredCond == PPC::PRED_GE)
+    return PPC::getPredicate(PPC::PRED_GT, PredHint);
+  if (PredCond == PPC::PRED_LT)
+    return PPC::getPredicate(PPC::PRED_LE, PredHint);
+
+  return 0;
+}
+
+// We can increment immediate x in (GT x) by changing it to (GE x+1) or
+// (LE x) to (LT x+1)
+static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
+  uint64_t Imm = CMPI->getOperand(2).getImm();
+  bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
+  if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
+    return 0;
+
+  PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
+  unsigned PredCond = PPC::getPredicateCondition(Pred);
+  unsigned PredHint = PPC::getPredicateHint(Pred);
+  if (PredCond == PPC::PRED_GT)
+    return PPC::getPredicate(PPC::PRED_GE, PredHint);
+  if (PredCond == PPC::PRED_LE)
+    return PPC::getPredicate(PPC::PRED_LT, PredHint);
+
+  return 0;
+}
+
+static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
+                                          MachineRegisterInfo *MRI) {
+
+  auto isEligibleBB = [&](MachineBasicBlock &BB) {
+    auto BII = BB.getFirstInstrTerminator();
+    // We optimize BBs ending with a conditional branch.
+    // We check only for BCC here, not BCCLR, because BCCLR
+    // will be formed only later in the pipeline. 
+    if (BB.succ_size() == 2 &&
+        BII != BB.instr_end() &&
+        (*BII).getOpcode() == PPC::BCC &&
+        (*BII).getOperand(1).isReg()) {
+      // We optimize only if the condition code is used only by one BCC.
+      unsigned CndReg = (*BII).getOperand(1).getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(CndReg) ||
+          !MRI->hasOneNonDBGUse(CndReg))
+        return false;
+
+      // We skip this BB if a physical register is used in comparison.
+      MachineInstr *CMPI = MRI->getVRegDef(CndReg);
+      for (MachineOperand &MO : CMPI->operands())
+        if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+          return false;
+
+      return true;
+    }
+    return false;
+  };
+
+  if (MBB.pred_size() != 1)
+    return false;
+
+  MachineBasicBlock *PredMBB = *MBB.pred_begin();
+  if (isEligibleBB(MBB) && isEligibleBB(*PredMBB))
+    return true;
+
+  return false;
+}
+
+// If multiple conditional branches are executed based on the (essentially)
+// same comparison, we merge compare instructions into one and make multiple
+// conditional branches on this comparison.
+// For example,
+//   if (a == 0) { ... }
+//   else if (a < 0) { ... }
+// can be executed by one compare and two conditional branches instead of
+// two pairs of a compare and a conditional branch.
+//
+// This method merges two compare instructions in two MBBs and modifies the
+// compare and conditional branch instructions if needed.
+// For the above example, the input for this pass looks like:
+//   cmplwi r3, 0
+//   beq    0, .LBB0_3
+//   cmpwi  r3, -1
+//   bgt    0, .LBB0_4
+// So, before merging two compares, we need to modify these instructions as
+//   cmpwi  r3, 0       ; cmplwi and cmpwi yield same result for beq
+//   beq    0, .LBB0_3
+//   cmpwi  r3, 0       ; greather than -1 means greater or equal to 0
+//   bge    0, .LBB0_4
+
+bool PPCMIPeephole::eliminateRedundantCompare(void) {
+  bool Simplified = false;
+
+  for (MachineBasicBlock &MBB2 : *MF) {
+    // We only consider two basic blocks MBB1 and MBB2 if
+    // - both MBBs end with a conditional branch,
+    // - MBB1 is the only predecessor of MBB2, and
+    // - compare does not take a physical register as a operand in both MBBs.
+    if (!eligibleForCompareElimination(MBB2, MRI))
+      continue;
+
+    MachineBasicBlock *MBB1 = *MBB2.pred_begin();
+    MachineInstr *BI1   = &*MBB1->getFirstInstrTerminator();
+    MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
+
+    MachineInstr *BI2   = &*MBB2.getFirstInstrTerminator();
+    MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
+
+    // We cannot optimize an unsupported compare opcode or
+    // a mix of 32-bit and 64-bit comaprisons
+    if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
+        !isSupportedCmpOp(CMPI2->getOpcode()) ||
+        is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
+      continue;
+
+    unsigned NewOpCode = 0;
+    unsigned NewPredicate1 = 0, NewPredicate2 = 0;
+    int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
+
+    if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
+      // Typically, unsigned comparison is used for equality check, but
+      // we replace it with a signed comparison if the comparison
+      // to be merged is a signed comparison.
+      // In other cases of opcode mismatch, we cannot optimize this.
+      if (isEqOrNe(BI2) &&
+          CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
+        NewOpCode = CMPI1->getOpcode();
+      else if (isEqOrNe(BI1) &&
+               getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
+        NewOpCode = CMPI2->getOpcode();
+      else continue;
+    }
+
+    if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
+      // In case of comparisons between two registers, these two registers
+      // must be same to merge two comparisons.
+      unsigned Cmp1Operand1 = CMPI1->getOperand(1).getReg();
+      unsigned Cmp1Operand2 = CMPI1->getOperand(2).getReg();
+      unsigned Cmp2Operand1 = CMPI2->getOperand(1).getReg();
+      unsigned Cmp2Operand2 = CMPI2->getOperand(2).getReg();
+      if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
+        // Same pair of registers in the same order; ready to merge as is.
+      }
+      else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
+        // Same pair of registers in different order.
+        // We reverse the predicate to merge compare instructions.
+        PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm();
+        NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
+      }
+      else continue;
+    }
+    else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()){
+      // In case of comparisons between a register and an immediate,
+      // the operand register must be same for two compare instructions.
+      if (CMPI1->getOperand(1).getReg() != CMPI2->getOperand(1).getReg())
+        continue;
+
+      NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
+      NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
+
+      // If immediate are not same, we try to adjust by changing predicate;
+      // e.g. GT imm means GE (imm+1).
+      if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
+        int Diff = Imm1 - Imm2;
+        if (Diff < -2 || Diff > 2)
+          continue;
+
+        unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
+        unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
+        unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
+        unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
+        if (Diff == 2) {
+          if (PredToInc2 && PredToDec1) {
+            NewPredicate2 = PredToInc2;
+            NewPredicate1 = PredToDec1;
+            NewImm2++;
+            NewImm1--;
+          }
+        }
+        else if (Diff == 1) {
+          if (PredToInc2) {
+            NewImm2++;
+            NewPredicate2 = PredToInc2;
+          }
+          else if (PredToDec1) {
+            NewImm1--;
+            NewPredicate1 = PredToDec1;
+          }
+        }
+        else if (Diff == -1) {
+          if (PredToDec2) {
+            NewImm2--;
+            NewPredicate2 = PredToDec2;
+          }
+          else if (PredToInc1) {
+            NewImm1++;
+            NewPredicate1 = PredToInc1;
+          }
+        }
+        else if (Diff == -2) {
+          if (PredToDec2 && PredToInc1) {
+            NewPredicate2 = PredToDec2;
+            NewPredicate1 = PredToInc1;
+            NewImm2--;
+            NewImm1++;
+          }
+        }
+      }
+
+      // We cannnot merge two compares if the immediates are not same.
+      if (NewImm2 != NewImm1)
+        continue;
+    }
+
+    DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
+    DEBUG(CMPI1->dump());
+    DEBUG(BI1->dump());
+    DEBUG(CMPI2->dump());
+    DEBUG(BI2->dump());
+
+    // We adjust opcode, predicates and immediate as we determined above.
+    if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
+      CMPI1->setDesc(TII->get(NewOpCode));
+    }
+    if (NewPredicate1) {
+      BI1->getOperand(0).setImm(NewPredicate1);
+    }
+    if (NewPredicate2) {
+      BI2->getOperand(0).setImm(NewPredicate2);
+    }
+    if (NewImm1 != Imm1) {
+      CMPI1->getOperand(2).setImm(NewImm1);
+    }
+
+    // We finally eliminate compare instruction in MBB2.
+    BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
+    BI2->getOperand(1).setIsKill(true);
+    BI1->getOperand(1).setIsKill(false);
+    CMPI2->eraseFromParent();
+
+    DEBUG(dbgs() << "into a compare and two branches:\n");
+    DEBUG(CMPI1->dump());
+    DEBUG(BI1->dump());
+    DEBUG(BI2->dump());
+
+    Simplified = true;
+  }
+
   return Simplified;
 }
 

Added: llvm/trunk/test/CodeGen/PowerPC/cmp_elimination.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/cmp_elimination.ll?rev=312514&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/cmp_elimination.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/cmp_elimination.ll Mon Sep  4 21:15:17 2017
@@ -0,0 +1,722 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s
+
+; Test cases for compare elimination in PPCMIPeephole pass
+
+define void @func1(i32 signext %a) {
+; We should have only one compare instruction
+; CHECK-LABEL: @func1
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp eq i32 %a, 100
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp slt i32 %a, 100
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func2(i32 signext %a) {
+; CHECK-LABEL: @func2
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp slt i32 %a, 100
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp eq i32 %a, 100
+  br i1 %cmp1, label %if.end3, label %if.then2
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func3(i32 signext %a) {
+; CHECK-LABEL: @func3
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp sgt i32 %a, 100
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp eq i32 %a, 100
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func4(i32 zeroext %a) {
+; CHECK-LABEL: @func4
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp eq i32 %a, 100
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp ult i32 %a, 100
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func5(i32 zeroext %a) {
+; CHECK-LABEL: @func5
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp ult i32 %a, 100
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp eq i32 %a, 100
+  br i1 %cmp1, label %if.end3, label %if.then2
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func6(i32 zeroext %a) {
+; CHECK-LABEL: @func6
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp ugt i32 %a, 100
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp eq i32 %a, 100
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func7(i64 %a) {
+; CHECK-LABEL: @func7
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp eq i64 %a, 100
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp slt i64 %a, 100
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func8(i64 %a) {
+; CHECK-LABEL: @func8
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp slt i64 %a, 100
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp eq i64 %a, 100
+  br i1 %cmp1, label %if.end3, label %if.then2
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func9(i64 %a) {
+; CHECK-LABEL: @func9
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp sgt i64 %a, 100
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp eq i64 %a, 100
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func10(i64 %a) {
+; CHECK-LABEL: @func10
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp eq i64 %a, 100
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp ult i64 %a, 100
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func11(i64 %a) {
+; CHECK-LABEL: @func11
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp ult i64 %a, 100
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp eq i64 %a, 100
+  br i1 %cmp1, label %if.end3, label %if.then2
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func12(i64 %a) {
+; CHECK-LABEL: @func12
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp ugt i64 %a, 100
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp eq i64 %a, 100
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func13(i32 signext %a, i32 signext %b) {
+; CHECK-LABEL: @func13
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp eq i32 %a, %b
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp slt i32 %a, %b
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func14(i32 signext %a, i32 signext %b) {
+; CHECK-LABEL: @func14
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp slt i32 %a, %b
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp sgt i32 %a, %b
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func15(i32 signext %a, i32 signext %b) {
+; CHECK-LABEL: @func15
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp slt i32 %b, %a
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp eq i32 %a, %b
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func16(i32 zeroext %a, i32 zeroext %b) {
+; CHECK-LABEL: @func16
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp eq i32 %a, %b
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp ult i32 %a, %b
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func17(i32 zeroext %a, i32 zeroext %b) {
+; CHECK-LABEL: @func17
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp ult i32 %a, %b
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp ugt i32 %a, %b
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func18(i32 zeroext %a, i32 zeroext %b) {
+; CHECK-LABEL: @func18
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp ult i32 %b, %a
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp eq i32 %a, %b
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func19(i64 %a, i64 %b) {
+; CHECK-LABEL: @func19
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp eq i64 %a, %b
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp slt i64 %a, %b
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func20(i64 %a, i64 %b) {
+; CHECK-LABEL: @func20
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp slt i64 %a, %b
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp sgt i64 %a, %b
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func21(i64 %a, i64 %b) {
+; CHECK-LABEL: @func21
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp slt i64 %b, %a
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp eq i64 %a, %b
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func22(i64 %a, i64 %b) {
+; CHECK-LABEL: @func22
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp eq i64 %a, %b
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp ult i64 %a, %b
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func23(i64 %a, i64 %b) {
+; CHECK-LABEL: @func23
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp ult i64 %a, %b
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp ugt i64 %a, %b
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func24(i64 %a, i64 %b) {
+; CHECK-LABEL: @func24
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp ult i64 %b, %a
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end3
+
+if.else:
+  %cmp1 = icmp eq i64 %a, %b
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+  tail call void @dummy2()
+  br label %if.end3
+
+if.end3:
+  ret void
+}
+
+
+define void @func25(i64 %a, i64 %b) {
+; CHECK-LABEL: @func25
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp slt i64 %b, %a
+  br i1 %cmp, label %if.then, label %if.else, !prof !1
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end6
+
+if.else:
+  %cmp2 = icmp eq i64 %a, %b
+  br i1 %cmp2, label %if.then4, label %if.else5
+
+if.then4:
+  tail call void @dummy2()
+  br label %if.end6
+
+if.else5:
+  tail call void @dummy3()
+  br label %if.end6
+
+if.end6:
+  ret void
+}
+
+
+define void @func26(i32 signext %a) {
+; CHECK-LABEL: @func26
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: blr
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %if.then, label %if.else, !prof !2
+
+if.then:
+  tail call void @dummy1()
+  br label %if.end9
+
+if.else:
+  %cmp2 = icmp eq i32 %a, 0
+  br i1 %cmp2, label %if.then7, label %if.else8, !prof !2
+
+if.then7:
+  tail call void @dummy2()
+  br label %if.end9
+
+if.else8:
+  tail call void @dummy3()
+  br label %if.end9
+
+if.end9:
+  ret void
+}
+
+ at g1 = external local_unnamed_addr global i32, align 4
+ at g2 = external local_unnamed_addr global i32, align 4
+
+define void @func27(i32 signext %a) {
+; CHECK-LABEL: @func27
+; CHECK: cmp
+; CHECK: beq
+; CHECK-NOT: cmp
+; CHECK: bgelr
+; CHECK: blr
+entry:
+  %cmp = icmp eq i32 %a, 0
+  br i1 %cmp, label %if.end3.sink.split, label %if.else
+
+if.else:
+  %cmp1 = icmp slt i32 %a, 0
+  br i1 %cmp1, label %if.end3.sink.split, label %if.end
+
+if.end3.sink.split:
+  %g2.sink = phi i32* [ @g2, %if.else ], [ @g1, %entry ]
+  store i32 0, i32* %g2.sink, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+declare void @dummy1()
+declare void @dummy2()
+declare void @dummy3()
+
+!1 = !{!"branch_weights", i32 2000, i32 1}
+!2 = !{!"branch_weights", i32 1, i32 2000}




More information about the llvm-commits mailing list