[llvm] dba29f7 - [AArch64][GlobalISel] Fold G_AND into G_BRCOND

Jessica Paquette via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 28 14:02:17 PST 2020


Author: Jessica Paquette
Date: 2020-01-28T14:00:31-08:00
New Revision: dba29f7c3b36443c7287ba5a31d166f8edb73544

URL: https://github.com/llvm/llvm-project/commit/dba29f7c3b36443c7287ba5a31d166f8edb73544
DIFF: https://github.com/llvm/llvm-project/commit/dba29f7c3b36443c7287ba5a31d166f8edb73544.diff

LOG: [AArch64][GlobalISel] Fold G_AND into G_BRCOND

When the G_BRCOND is fed by a eq or ne G_ICMP, it may be possible to fold a
G_AND into the branch by producing a tbnz/tbz instead.

This happens when

  1. We have a ne/eq G_ICMP feeding into the G_BRCOND
  2. The G_ICMP is a comparison against 0
  3. One of the operands of the G_AND is a power of 2 constant

This is very similar to the code in AArch64TargetLowering::LowerBR_CC.

Add opt-and-tbnz-tbz to test this.

Differential Revision: https://reviews.llvm.org/D73573

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index a943d25d895d..bfa329b1fe94 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -89,6 +89,11 @@ class AArch64InstructionSelector : public InstructionSelector {
   bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
                            MachineRegisterInfo &MRI) const;
 
+  bool tryOptAndIntoCompareBranch(MachineInstr *LHS,
+                                  int64_t CmpConstant,
+                                  const CmpInst::Predicate &Pred,
+                                  MachineBasicBlock *DstMBB,
+                                  MachineIRBuilder &MIB) const;
   bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
                            MachineRegisterInfo &MRI) const;
 
@@ -983,6 +988,64 @@ static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
   }
 }
 
+bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
+    MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred,
+    MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const {
+  // Given something like this:
+  //
+  //  %x = ...Something...
+  //  %one = G_CONSTANT i64 1
+  //  %zero = G_CONSTANT i64 0
+  //  %and = G_AND %x, %one
+  //  %cmp = G_ICMP intpred(ne), %and, %zero
+  //  %cmp_trunc = G_TRUNC %cmp
+  //  G_BRCOND %cmp_trunc, %bb.3
+  //
+  // We want to try and fold the AND into the G_BRCOND and produce either a
+  // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
+  //
+  // In this case, we'd get
+  //
+  // TBNZ %x %bb.3
+  //
+  if (!AndInst || AndInst->getOpcode() != TargetOpcode::G_AND)
+    return false;
+
+  // Need to be comparing against 0 to fold.
+  if (CmpConstant != 0)
+    return false;
+
+  MachineRegisterInfo &MRI = *MIB.getMRI();
+  unsigned Opc = 0;
+  Register TestReg = AndInst->getOperand(1).getReg();
+  unsigned TestSize = MRI.getType(TestReg).getSizeInBits();
+
+  // Only support EQ and NE. If we have LT, then it *is* possible to fold, but
+  // we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
+  // so folding would be redundant.
+  if (Pred == CmpInst::Predicate::ICMP_EQ)
+    Opc = TestSize == 32 ? AArch64::TBZW : AArch64::TBZX;
+  else if (Pred == CmpInst::Predicate::ICMP_NE)
+    Opc = TestSize == 32 ? AArch64::TBNZW : AArch64::TBNZX;
+  else
+    return false;
+
+  // Check if the AND has a constant on its RHS which we can use as a mask.
+  // If it's a power of 2, then it's the same as checking a specific bit.
+  // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
+  auto MaybeBit =
+      getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI);
+  if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
+    return false;
+  uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
+
+  // Construct the branch.
+  auto BranchMI =
+      MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
+  constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
+  return true;
+}
+
 bool AArch64InstructionSelector::selectCompareBranch(
     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
 
@@ -1000,9 +1063,9 @@ bool AArch64InstructionSelector::selectCompareBranch(
   if (!VRegAndVal)
     std::swap(RHS, LHS);
 
+  MachineIRBuilder MIB(I);
   VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
   if (!VRegAndVal || VRegAndVal->Value != 0) {
-    MachineIRBuilder MIB(I);
     // If we can't select a CBZ then emit a cmp + Bcc.
     if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
                             CCMI->getOperand(1), MIB))
@@ -1014,11 +1077,18 @@ bool AArch64InstructionSelector::selectCompareBranch(
     return true;
   }
 
+  // Try to fold things into the branch.
+  const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
+  MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);
+  if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
+                                 MIB)) {
+    I.eraseFromParent();
+    return true;
+  }
+
   const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
   if (RB.getID() != AArch64::GPRRegBankID)
     return false;
-
-  const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
   if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
     return false;
 

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir
new file mode 100644
index 000000000000..525bbe588142
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir
@@ -0,0 +1,257 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Verify that we can fold G_AND into G_BRCOND when all of the following hold:
+#   1. We have a ne/eq G_ICMP feeding into the G_BRCOND
+#   2. The G_ICMP is being compared against 0
+#   3. One of the operands of the G_AND is a power of 2
+#
+# If all of these hold, we should produce a tbnz or a tbz.
+...
+---
+name:            tbnz_and_s64
+alignment:       4
+legalized:       true
+regBankSelected: true
+body:             |
+  ; CHECK-LABEL: name: tbnz_and_s64
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+  ; CHECK:   TBNZX [[COPY]], 3, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 8 ; Power of 2 => TBNZ uses 3 as mask
+    %3:gpr(s64) = G_CONSTANT i64 0
+    %2:gpr(s64) = G_AND %0, %1
+    %5:gpr(s32) = G_ICMP intpred(ne), %2(s64), %3
+    %4:gpr(s1) = G_TRUNC %5(s32)
+    G_BRCOND %4(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+
+...
+---
+name:            tbz_and_s64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: tbz_and_s64
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   liveins: $x0
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+  ; CHECK:   TBZX [[COPY]], 4, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 16 ; Power of 2 => TBNZ uses 4 as mask
+    %3:gpr(s64) = G_CONSTANT i64 0
+    %2:gpr(s64) = G_AND %0, %1
+    %5:gpr(s32) = G_ICMP intpred(eq), %2(s64), %3
+    %4:gpr(s1) = G_TRUNC %5(s32)
+    G_BRCOND %4(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+
+...
+---
+name:            tbnz_and_s32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: tbnz_and_s32
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   liveins: $w0
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+  ; CHECK:   TBNZW [[COPY]], 0, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $w0
+    %0:gpr(s32) = COPY $w0
+    %1:gpr(s32) = G_CONSTANT i32 1 ; Power of 2 => TBNZ uses 0 as mask
+    %3:gpr(s32) = G_CONSTANT i32 0
+    %2:gpr(s32) = G_AND %0, %1
+    %5:gpr(s32) = G_ICMP intpred(ne), %2(s32), %3
+    %4:gpr(s1) = G_TRUNC %5(s32)
+    G_BRCOND %4(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+
+...
+---
+name:            tbz_and_s32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: tbz_and_s32
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   liveins: $w0
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+  ; CHECK:   TBZW [[COPY]], 0, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $w0
+    %0:gpr(s32) = COPY $w0
+    %1:gpr(s32) = G_CONSTANT i32 1 ; Power of 2 => TBNZ uses 0 as mask
+    %3:gpr(s32) = G_CONSTANT i32 0
+    %2:gpr(s32) = G_AND %0, %1
+    %5:gpr(s32) = G_ICMP intpred(eq), %2(s32), %3
+    %4:gpr(s1) = G_TRUNC %5(s32)
+    G_BRCOND %4(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+
+...
+---
+name:            dont_fold_and_lt
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: dont_fold_and_lt
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   liveins: $w0
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+  ; CHECK:   $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv
+  ; CHECK:   [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
+  ; CHECK:   TBNZW [[CSINCWr]], 0, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $w0
+    %0:gpr(s32) = COPY $w0
+    %1:gpr(s32) = G_CONSTANT i32 1
+    %3:gpr(s32) = G_CONSTANT i32 0
+    %2:gpr(s32) = G_AND %0, %1
+    %5:gpr(s32) = G_ICMP intpred(slt), %2(s32), %3
+    %4:gpr(s1) = G_TRUNC %5(s32)
+    G_BRCOND %4(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+
+...
+---
+name:            dont_fold_and_gt
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: dont_fold_and_gt
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   liveins: $w0
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+  ; CHECK:   $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv
+  ; CHECK:   [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
+  ; CHECK:   TBNZW [[CSINCWr]], 0, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $w0
+    %0:gpr(s32) = COPY $w0
+    %1:gpr(s32) = G_CONSTANT i32 1
+    %3:gpr(s32) = G_CONSTANT i32 0
+    %2:gpr(s32) = G_AND %0, %1
+    %5:gpr(s32) = G_ICMP intpred(sgt), %2(s32), %3
+    %4:gpr(s1) = G_TRUNC %5(s32)
+    G_BRCOND %4(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+
+...
+---
+name:            dont_fold_and_not_power_of_2
+alignment:       4
+legalized:       true
+regBankSelected: true
+body:             |
+  ; CHECK-LABEL: name: dont_fold_and_not_power_of_2
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+  ; CHECK:   [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[COPY]], 4098
+  ; CHECK:   CBNZX [[ANDXri]], %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 7
+    %3:gpr(s64) = G_CONSTANT i64 0
+    %2:gpr(s64) = G_AND %0, %1
+    %5:gpr(s32) = G_ICMP intpred(ne), %2(s64), %3
+    %4:gpr(s1) = G_TRUNC %5(s32)
+    G_BRCOND %4(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+
+...
+---
+name:            dont_fold_cmp_not_0
+alignment:       4
+legalized:       true
+regBankSelected: true
+body:             |
+  ; CHECK-LABEL: name: dont_fold_cmp_not_0
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+  ; CHECK:   [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[COPY]], 8064
+  ; CHECK:   $xzr = SUBSXri [[ANDXri]], 4, 0, implicit-def $nzcv
+  ; CHECK:   Bcc 1, %bb.1, implicit $nzcv
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 4
+    %3:gpr(s64) = G_CONSTANT i64 4
+    %2:gpr(s64) = G_AND %0, %1
+    %5:gpr(s32) = G_ICMP intpred(ne), %2(s64), %3
+    %4:gpr(s1) = G_TRUNC %5(s32)
+    G_BRCOND %4(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR


        


More information about the llvm-commits mailing list