[llvm] c8c987d - [AArch64][GlobalISel] Fold in G_ANYEXT/G_ZEXT into TB(N)Z

Thu Jan 30 14:51:38 PST 2020

Author: Jessica Paquette
Date: 2020-01-30T14:51:26-08:00
New Revision: c8c987d3105a9bc9bab241946ec96c36783ed120

URL: https://github.com/llvm/llvm-project/commit/c8c987d3105a9bc9bab241946ec96c36783ed120
DIFF: https://github.com/llvm/llvm-project/commit/c8c987d3105a9bc9bab241946ec96c36783ed120.diff

LOG: [AArch64][GlobalISel] Fold in G_ANYEXT/G_ZEXT into TB(N)Z

This is similar to the code in getTestBitOperand in AArch64ISelLowering. Instead
of implementing all of the TB(N)Z optimizations at once, this patch implements
the simplest case first. The way that this is set up should make it fairly easy
to add the rest as we go along.

The idea here is that after determining that we can use a TB(N)Z, we can
continue looking through instructions and perform further folding.

In this case, when we have a G_ZEXT or G_ANYEXT where the extended bits are not
used, we can fold it into the TB(N)Z.

Differential Revision: https://reviews.llvm.org/D73673

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index cf3dc78ff505..b398da90128e 100644

--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -990,6 +990,27 @@ static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
   }
 }
 
+/// Return a register which can be used as a bit to test in a TB(N)Z.
+static Register getTestBitReg(Register Reg, MachineRegisterInfo &MRI) {
+  assert(Reg.isValid() && "Expected valid register!");
+  while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
+    unsigned Opc = MI->getOpcode();
+    Register NextReg;
+
+    // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
+    if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT)
+      NextReg = MI->getOperand(1).getReg();
+
+    // Did we find something worth folding?
+    if (!NextReg.isValid() || !MRI.hasOneUse(NextReg))
+      break;
+
+    // NextReg is worth folding. Keep looking.
+    Reg = NextReg;
+  }
+  return Reg;
+}
+
 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
     MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred,
     MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const {
@@ -1018,7 +1039,6 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
     return false;
 
   MachineRegisterInfo &MRI = *MIB.getMRI();
-  Register TestReg = AndInst->getOperand(1).getReg();
 
   // Only support EQ and NE. If we have LT, then it *is* possible to fold, but
   // we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
@@ -1034,7 +1054,11 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
       getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI);
   if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
     return false;
+
+  // Try to optimize the TB(N)Z.
   uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
+  Register TestReg = AndInst->getOperand(1).getReg();
+  TestReg = getTestBitReg(TestReg, MRI);
 
   // Choose the correct TB(N)Z opcode to use.
   unsigned Opc = 0;

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
new file mode 100644
index 000000000000..453cef564f40
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
@@ -0,0 +1,136 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Check that we can continue matching when we are in a situation where we will
+# emit a TB(N)Z.
+...
+---
+name:            fold_zext
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: fold_zext
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   liveins: $x0
+  ; CHECK:   %copy:gpr32 = COPY $w0
+  ; CHECK:   TBNZW %copy, 3, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %copy:gpr(s32) = COPY $w0
+    %bit:gpr(s64) = G_CONSTANT i64 8
+    %zero:gpr(s64) = G_CONSTANT i64 0
+    %fold_me:gpr(s64) = G_ZEXT %copy(s32)
+    %and:gpr(s64) = G_AND %fold_me, %bit
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+    G_BRCOND %cmp_trunc(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+...
+---
+name:            fold_anyext
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: fold_anyext
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   liveins: $x0
+  ; CHECK:   %copy:gpr32 = COPY $w0
+  ; CHECK:   TBNZW %copy, 3, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %copy:gpr(s32) = COPY $w0
+    %bit:gpr(s64) = G_CONSTANT i64 8
+    %zero:gpr(s64) = G_CONSTANT i64 0
+    %fold_me:gpr(s64) = G_ANYEXT %copy(s32)
+    %and:gpr(s64) = G_AND %fold_me, %bit
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+    G_BRCOND %cmp_trunc(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+...
+---
+name:            fold_multiple
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: fold_multiple
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   liveins: $h0
+  ; CHECK:   [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
+  ; CHECK:   %copy:gpr32all = COPY [[SUBREG_TO_REG]]
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32 = COPY %copy
+  ; CHECK:   TBNZW [[COPY]], 3, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $h0
+    %copy:gpr(s16) = COPY $h0
+    %bit:gpr(s64) = G_CONSTANT i64 8
+    %zero:gpr(s64) = G_CONSTANT i64 0
+    %ext1:gpr(s32) = G_ZEXT %copy(s16)
+    %ext2:gpr(s64) = G_ANYEXT %ext1(s32)
+    %and:gpr(s64) = G_AND %ext2, %bit
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+    G_BRCOND %cmp_trunc(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+...
+---
+name:            dont_fold_more_than_one_use
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: dont_fold_more_than_one_use
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   liveins: $x0
+  ; CHECK:   %copy:gpr32 = COPY $w0
+  ; CHECK:   [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %copy, %subreg.sub_32
+  ; CHECK:   %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31
+  ; CHECK:   TBNZW %copy, 3, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   $x0 = COPY %zext
+  ; CHECK:   RET_ReallyLR implicit $x0
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %copy:gpr(s32) = COPY $w0
+    %bit:gpr(s64) = G_CONSTANT i64 8
+    %zero:gpr(s64) = G_CONSTANT i64 0
+    %zext:gpr(s64) = G_ZEXT %copy(s32)
+    %and:gpr(s64) = G_AND %zext, %bit
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+    G_BRCOND %cmp_trunc(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    $x0 = COPY %zext:gpr(s64)
+    RET_ReallyLR implicit $x0