[llvm] r218693 - Recommit r218010 [FastISel][AArch64] Fold bit test and branch into TBZ and TBNZ.

Juergen Ributzka juergen at apple.com
Tue Sep 30 12:59:36 PDT 2014


Author: ributzka
Date: Tue Sep 30 14:59:35 2014
New Revision: 218693

URL: http://llvm.org/viewvc/llvm-project?rev=218693&view=rev
Log:
Recommit r218010 [FastISel][AArch64] Fold bit test and branch into TBZ and TBNZ.

Note: This version fixed an issue with the TBZ/TBNZ instructions that were
generated in FastISel. The issue was that the 64bit version of TBZ (TBZX)
automagically sets the upper bit of the immediate field that is used to specify
the bit we want to test. To test for any of the lower 32bits we have to first
extract the subregister and use the 32bit version of the TBZ instruction (TBZW).

Original commit message:
Teach selectBranch to fold bit test and branch into a single instruction (TBZ or
TBNZ).

Added:
    llvm/trunk/test/CodeGen/AArch64/fast-isel-tbz.ll
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp

Modified: llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp?rev=218693&r1=218692&r2=218693&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp Tue Sep 30 14:59:35 2014
@@ -173,6 +173,7 @@ private:
                          bool WantResult = true);
 
   // Emit functions.
+  bool emitCompareAndBranch(const BranchInst *BI);
   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
@@ -1927,30 +1928,125 @@ static AArch64CC::CondCode getCompareCC(
   }
 }
 
-/// \brief Check if the comparison against zero and the following branch can be
-/// folded into a single instruction (CBZ or CBNZ).
-static bool canFoldZeroIntoBranch(const CmpInst *CI) {
-  CmpInst::Predicate Predicate = CI->getPredicate();
-  if ((Predicate != CmpInst::ICMP_EQ) && (Predicate != CmpInst::ICMP_NE))
-    return false;
+/// \brief Try to emit a combined compare-and-branch instruction.
+bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
+  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
+  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
+  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
 
-  Type *Ty = CI->getOperand(0)->getType();
-  if (!Ty->isIntegerTy())
-    return false;
+  const Value *LHS = CI->getOperand(0);
+  const Value *RHS = CI->getOperand(1);
+
+  Type *Ty = LHS->getType();
+    if (!Ty->isIntegerTy())
+      return false;
 
   unsigned BW = cast<IntegerType>(Ty)->getBitWidth();
   if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64)
     return false;
 
-  if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(0)))
-    if (C->isNullValue())
-      return true;
+  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
 
-  if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(1)))
-    if (C->isNullValue())
-      return true;
+  // Try to take advantage of fallthrough opportunities.
+  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+    std::swap(TBB, FBB);
+    Predicate = CmpInst::getInversePredicate(Predicate);
+  }
 
-  return false;
+  int TestBit = -1;
+  bool IsCmpNE;
+  if ((Predicate == CmpInst::ICMP_EQ) || (Predicate == CmpInst::ICMP_NE)) {
+    if (const auto *C = dyn_cast<ConstantInt>(LHS))
+      if (C->isNullValue())
+        std::swap(LHS, RHS);
+
+    if (!isa<ConstantInt>(RHS))
+      return false;
+
+    if (!cast<ConstantInt>(RHS)->isNullValue())
+      return false;
+
+    if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
+      if (AI->getOpcode() == Instruction::And) {
+        const Value *AndLHS = AI->getOperand(0);
+        const Value *AndRHS = AI->getOperand(1);
+
+        if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
+          if (C->getValue().isPowerOf2())
+            std::swap(AndLHS, AndRHS);
+
+        if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
+          if (C->getValue().isPowerOf2()) {
+            TestBit = C->getValue().logBase2();
+            LHS = AndLHS;
+          }
+      }
+    IsCmpNE = Predicate == CmpInst::ICMP_NE;
+  } else if (Predicate == CmpInst::ICMP_SLT) {
+    if (!isa<ConstantInt>(RHS))
+      return false;
+
+    if (!cast<ConstantInt>(RHS)->isNullValue())
+      return false;
+
+    TestBit = BW - 1;
+    IsCmpNE = true;
+  } else if (Predicate == CmpInst::ICMP_SGT) {
+    if (!isa<ConstantInt>(RHS))
+      return false;
+
+    if (cast<ConstantInt>(RHS)->getValue() != -1)
+      return false;
+
+    TestBit = BW - 1;
+    IsCmpNE = false;
+  } else
+    return false;
+
+  static const unsigned OpcTable[2][2][2] = {
+    { {AArch64::CBZW,  AArch64::CBZX },
+      {AArch64::CBNZW, AArch64::CBNZX} },
+    { {AArch64::TBZW,  AArch64::TBZX },
+      {AArch64::TBNZW, AArch64::TBNZX} }
+  };
+
+  bool IsBitTest = TestBit != -1;
+  bool Is64Bit = BW == 64;
+  if (TestBit < 32 && TestBit >= 0)
+    Is64Bit = false;
+  
+  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
+  const MCInstrDesc &II = TII.get(Opc);
+
+  unsigned SrcReg = getRegForValue(LHS);
+  if (!SrcReg)
+    return false;
+  bool SrcIsKill = hasTrivialKill(LHS);
+
+  if (BW == 64 && !Is64Bit) {
+    SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
+                                        AArch64::sub_32);
+    SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
+  }
+
+  // Emit the combined compare and branch instruction.
+  MachineInstrBuilder MIB =
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+          .addReg(SrcReg, getKillRegState(SrcIsKill));
+  if (IsBitTest)
+    MIB.addImm(TestBit);
+  MIB.addMBB(TBB);
+
+  // Obtain the branch weight and add the TrueBB to the successor list.
+  uint32_t BranchWeight = 0;
+  if (FuncInfo.BPI)
+    BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+                                               TBB->getBasicBlock());
+  FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+  fastEmitBranch(FBB, DbgLoc);
+
+  return true;
 }
 
 bool AArch64FastISel::selectBranch(const Instruction *I) {
@@ -1980,50 +2076,16 @@ bool AArch64FastISel::selectBranch(const
         return true;
       }
 
+      // Try to emit a combined compare-and-branch first.
+      if (emitCompareAndBranch(BI))
+        return true;
+
       // Try to take advantage of fallthrough opportunities.
       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
         std::swap(TBB, FBB);
         Predicate = CmpInst::getInversePredicate(Predicate);
       }
 
-      // Try to optimize comparisons against zero.
-      if (canFoldZeroIntoBranch(CI)) {
-        const Value *LHS = CI->getOperand(0);
-        const Value *RHS = CI->getOperand(1);
-
-        // Canonicalize zero values to the RHS.
-        if (const auto *C = dyn_cast<ConstantInt>(LHS))
-          if (C->isNullValue())
-            std::swap(LHS, RHS);
-
-        static const unsigned OpcTable[2][2] = {
-          {AArch64::CBZW,  AArch64::CBZX }, {AArch64::CBNZW, AArch64::CBNZX}
-        };
-        bool IsCmpNE = Predicate == CmpInst::ICMP_NE;
-        bool Is64Bit = LHS->getType()->isIntegerTy(64);
-        unsigned Opc = OpcTable[IsCmpNE][Is64Bit];
-
-        unsigned SrcReg = getRegForValue(LHS);
-        if (!SrcReg)
-          return false;
-        bool SrcIsKill = hasTrivialKill(LHS);
-
-        // Emit the combined compare and branch instruction.
-        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
-            .addReg(SrcReg, getKillRegState(SrcIsKill))
-            .addMBB(TBB);
-
-        // Obtain the branch weight and add the TrueBB to the successor list.
-        uint32_t BranchWeight = 0;
-        if (FuncInfo.BPI)
-          BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                                     TBB->getBasicBlock());
-        FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
-        fastEmitBranch(FBB, DbgLoc);
-        return true;
-      }
-
       // Emit the cmp.
       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
         return false;

Added: llvm/trunk/test/CodeGen/AArch64/fast-isel-tbz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fast-isel-tbz.ll?rev=218693&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fast-isel-tbz.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/fast-isel-tbz.ll Tue Sep 30 14:59:35 2014
@@ -0,0 +1,125 @@
+; RUN: llc                             -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+
+define i32 @icmp_eq_i8(i8 zeroext %a) {
+; CHECK-LABEL: icmp_eq_i8
+; CHECK:       tbz {{w[0-9]+}}, #0, {{LBB.+_2}}
+  %1 = and i8 %a, 1
+  %2 = icmp eq i8 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_eq_i16(i16 zeroext %a) {
+; CHECK-LABEL: icmp_eq_i16
+; CHECK:       tbz w0, #1, {{LBB.+_2}}
+  %1 = and i16 %a, 2
+  %2 = icmp eq i16 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_eq_i32(i32 %a) {
+; CHECK-LABEL: icmp_eq_i32
+; CHECK:       tbz w0, #2, {{LBB.+_2}}
+  %1 = and i32 %a, 4
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_eq_i64_1(i64 %a) {
+; CHECK-LABEL: icmp_eq_i64_1
+; CHECK:       tbz w0, #3, {{LBB.+_2}}
+  %1 = and i64 %a, 8
+  %2 = icmp eq i64 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_eq_i64_2(i64 %a) {
+; CHECK-LABEL: icmp_eq_i64_2
+; CHECK:       tbz x0, #32, {{LBB.+_2}}
+  %1 = and i64 %a, 4294967296
+  %2 = icmp eq i64 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_ne_i8(i8 zeroext %a) {
+; CHECK-LABEL: icmp_ne_i8
+; CHECK:       tbnz w0, #0, {{LBB.+_2}}
+  %1 = and i8 %a, 1
+  %2 = icmp ne i8 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_ne_i16(i16 zeroext %a) {
+; CHECK-LABEL: icmp_ne_i16
+; CHECK:       tbnz w0, #1, {{LBB.+_2}}
+  %1 = and i16 %a, 2
+  %2 = icmp ne i16 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_ne_i32(i32 %a) {
+; CHECK-LABEL: icmp_ne_i32
+; CHECK:       tbnz w0, #2, {{LBB.+_2}}
+  %1 = and i32 %a, 4
+  %2 = icmp ne i32 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_ne_i64_1(i64 %a) {
+; CHECK-LABEL: icmp_ne_i64_1
+; CHECK:       tbnz w0, #3, {{LBB.+_2}}
+  %1 = and i64 %a, 8
+  %2 = icmp ne i64 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_ne_i64_2(i64 %a) {
+; CHECK-LABEL: icmp_ne_i64_2
+; CHECK:       tbnz x0, #32, {{LBB.+_2}}
+  %1 = and i64 %a, 4294967296
+  %2 = icmp ne i64 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 0, i32 2147483647}
+!1 = metadata !{metadata !"branch_weights", i32 2147483647, i32 0}





More information about the llvm-commits mailing list