[llvm-branch-commits] [llvm] 6c3fa97 - [AArch64][GlobalISel] Select Bcc when it's better than TB(N)Z

Tue Dec 1 15:50:10 PST 2020

Author: Jessica Paquette
Date: 2020-12-01T15:45:14-08:00
New Revision: 6c3fa97d8a628541c82d8981aabefcb2dcb29f17

URL: https://github.com/llvm/llvm-project/commit/6c3fa97d8a628541c82d8981aabefcb2dcb29f17
DIFF: https://github.com/llvm/llvm-project/commit/6c3fa97d8a628541c82d8981aabefcb2dcb29f17.diff

LOG: [AArch64][GlobalISel] Select Bcc when it's better than TB(N)Z

Instead of falling back to selecting TB(N)Z when we fail to select an
optimized compare against 0, select Bcc instead.

Also simplify selectCompareBranch a little while we're here, because the logic
was kind of hard to follow.

At -O0, this is a 0.1% geomean code size improvement for CTMark.

A simple example of where this can kick in is here:
https://godbolt.org/z/4rra6P

In the example above, GlobalISel currently produces a subs, cset, and tbnz.
SelectionDAG, on the other hand, just emits a compare and b.le.

Differential Revision: https://reviews.llvm.org/D92358

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir
    llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 6691bf068042..3dba92eea3d3 100644

--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -257,6 +257,11 @@ class AArch64InstructionSelector : public InstructionSelector {
                             MachineBasicBlock *DstMBB,
                             MachineIRBuilder &MIB) const;
 
+  /// Emit a CB(N)Z instruction which branches to \p DestMBB.
+  MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
+                        MachineBasicBlock *DestMBB,
+                        MachineIRBuilder &MIB) const;
+
   // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
   // We use these manually instead of using the importer since it doesn't
   // support SDNodeXForm.
@@ -1394,9 +1399,7 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
   // Only support EQ and NE. If we have LT, then it *is* possible to fold, but
   // we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
   // so folding would be redundant.
-  if (Pred != CmpInst::Predicate::ICMP_EQ &&
-      Pred != CmpInst::Predicate::ICMP_NE)
-    return false;
+  assert(ICmpInst::isEquality(Pred) && "Expected only eq/ne?");
 
   // Check if the AND has a constant on its RHS which we can use as a mask.
   // If it's a power of 2, then it's the same as checking a specific bit.
@@ -1415,6 +1418,27 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
   return true;
 }
 
+MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
+                                                  bool IsNegative,
+                                                  MachineBasicBlock *DestMBB,
+                                                  MachineIRBuilder &MIB) const {
+  assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
+  MachineRegisterInfo &MRI = *MIB.getMRI();
+  assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
+             AArch64::GPRRegBankID &&
+         "Expected GPRs only?");
+  auto Ty = MRI.getType(CompareReg);
+  unsigned Width = Ty.getSizeInBits();
+  assert(!Ty.isVector() && "Expected scalar only?");
+  assert(Width <= 64 && "Expected width to be at most 64?");
+  static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
+                                          {AArch64::CBNZW, AArch64::CBNZX}};
+  unsigned Opc = OpcTable[IsNegative][Width == 64];
+  auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
+  constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
+  return &*BranchMI;
+}
+
 bool AArch64InstructionSelector::selectCompareBranch(
     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
 
@@ -1477,51 +1501,39 @@ bool AArch64InstructionSelector::selectCompareBranch(
     }
   }
 
-  if (!VRegAndVal) {
-    std::swap(RHS, LHS);
-    VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
-    LHSMI = getDefIgnoringCopies(LHS, MRI);
-  }
+  // Attempt to handle commutative condition codes. Right now, that's only
+  // eq/ne.
+  if (ICmpInst::isEquality(Pred)) {
+    if (!VRegAndVal) {
+      std::swap(RHS, LHS);
+      VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+      LHSMI = getDefIgnoringCopies(LHS, MRI);
+    }
 
-  if (!VRegAndVal || VRegAndVal->Value != 0) {
-    // If we can't select a CBZ then emit a cmp + Bcc.
-    auto Pred =
-        static_cast<CmpInst::Predicate>(CCMI->getOperand(1).getPredicate());
-    emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
-                       CCMI->getOperand(1), MIB);
-    const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
-    MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
-    I.eraseFromParent();
-    return true;
-  }
+    if (VRegAndVal && VRegAndVal->Value == 0) {
+      // If there's a G_AND feeding into this branch, try to fold it away by
+      // emitting a TB(N)Z instead.
+      if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
+                                     MIB)) {
+        I.eraseFromParent();
+        return true;
+      }
 
-  // Try to emit a TB(N)Z for an eq or ne condition.
-  if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
-                                 MIB)) {
-    I.eraseFromParent();
-    return true;
+      // Otherwise, try to emit a CB(N)Z instead.
+      auto LHSTy = MRI.getType(LHS);
+      if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
+        emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
+        I.eraseFromParent();
+        return true;
+      }
+    }
   }
 
-  const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
-  if (RB.getID() != AArch64::GPRRegBankID)
-    return false;
-  if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
-    return false;
-
-  const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
-  unsigned CBOpc = 0;
-  if (CmpWidth <= 32)
-    CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
-  else if (CmpWidth == 64)
-    CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
-  else
-    return false;
-
-  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
-      .addUse(LHS)
-      .addMBB(DestMBB)
-      .constrainAllUses(TII, TRI, RBI);
-
+  // Couldn't optimize. Emit a compare + bcc.
+  emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
+                     CCMI->getOperand(1), MIB);
+  const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
+  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
   I.eraseFromParent();
   return true;
 }

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir
index bb6ba25d06f7..154f00b96de3 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir
@@ -143,8 +143,7 @@ body:             |
   ; CHECK:   liveins: $w0
   ; CHECK:   [[COPY:%[0-9]+]]:gpr32 = COPY $w0
   ; CHECK:   [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri [[COPY]], 0, implicit-def $nzcv
-  ; CHECK:   [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
-  ; CHECK:   TBNZW [[CSINCWr]], 0, %bb.1
+  ; CHECK:   Bcc 11, %bb.1, implicit $nzcv
   ; CHECK:   B %bb.0
   ; CHECK: bb.1:
   ; CHECK:   RET_ReallyLR
@@ -176,8 +175,7 @@ body:             |
   ; CHECK:   liveins: $w0
   ; CHECK:   [[COPY:%[0-9]+]]:gpr32 = COPY $w0
   ; CHECK:   [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri [[COPY]], 0, implicit-def $nzcv
-  ; CHECK:   [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
-  ; CHECK:   TBNZW [[CSINCWr]], 0, %bb.1
+  ; CHECK:   Bcc 12, %bb.1, implicit $nzcv
   ; CHECK:   B %bb.0
   ; CHECK: bb.1:
   ; CHECK:   RET_ReallyLR

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir
index 2be18832a0e5..d8f962cdfb76 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir
@@ -100,8 +100,7 @@ body:             |
   ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
   ; CHECK:   %copy:gpr64 = COPY $x0
   ; CHECK:   [[ANDSXri:%[0-9]+]]:gpr64 = ANDSXri %copy, 8000, implicit-def $nzcv
-  ; CHECK:   %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
-  ; CHECK:   TBNZW %cmp, 0, %bb.1
+  ; CHECK:   Bcc 11, %bb.1, implicit $nzcv
   ; CHECK:   B %bb.0
   ; CHECK: bb.1:
   ; CHECK:   RET_ReallyLR
@@ -133,8 +132,7 @@ body:             |
   ; CHECK:   %copy:gpr64 = COPY $x0
   ; CHECK:   %zero:gpr64 = COPY $xzr
   ; CHECK:   [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %zero, %copy, implicit-def $nzcv
-  ; CHECK:   %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
-  ; CHECK:   TBNZW %cmp, 0, %bb.1
+  ; CHECK:   Bcc 11, %bb.1, implicit $nzcv
   ; CHECK:   B %bb.0
   ; CHECK: bb.1:
   ; CHECK:   RET_ReallyLR