[llvm] 2bd4644 - [AArch64][GlobalISel] Walk through G_AND in TB(N)Z bit calculation

Mon Feb 3 11:53:55 PST 2020

Author: Jessica Paquette
Date: 2020-02-03T11:53:47-08:00
New Revision: 2bd46444d73436b121c4accd7d2f21e1fa7229ae

URL: https://github.com/llvm/llvm-project/commit/2bd46444d73436b121c4accd7d2f21e1fa7229ae
DIFF: https://github.com/llvm/llvm-project/commit/2bd46444d73436b121c4accd7d2f21e1fa7229ae.diff

LOG: [AArch64][GlobalISel] Walk through G_AND in TB(N)Z bit calculation

Given

```
tb(n)z (and x, m), b
```

Where the `b`-th bit of `m` is 1,

```
tb(n)z (and x, m), b == tb(n)z x, b
```

So, we can walk past a `G_AND` in this case.

Also add test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir to test this.

Differential Revision: https://reviews.llvm.org/D73790

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 33d8a0817b0d..d51157ed724f 100644

--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -991,27 +991,72 @@ static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
 }
 
 /// Return a register which can be used as a bit to test in a TB(N)Z.
-static Register getTestBitReg(Register Reg, MachineRegisterInfo &MRI) {
+static Register getTestBitReg(Register Reg, uint64_t Bit,
+                              MachineRegisterInfo &MRI) {
   assert(Reg.isValid() && "Expected valid register!");
   while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
     unsigned Opc = MI->getOpcode();
-    Register NextReg;
-
     // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
     //
     // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
     // on the truncated x is the same as the bit number on x.
     if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
-        Opc == TargetOpcode::G_TRUNC)
-      NextReg = MI->getOperand(1).getReg();
+        Opc == TargetOpcode::G_TRUNC) {
+      Register NextReg = MI->getOperand(1).getReg();
+      // Did we find something worth folding?
+      if (!NextReg.isValid() || !MRI.hasOneUse(NextReg))
+        break;
+
+      // NextReg is worth folding. Keep looking.
+      Reg = NextReg;
+      continue;
+    }
 
-    // Did we find something worth folding?
-    if (!NextReg.isValid() || !MRI.hasOneUse(NextReg))
+    // Attempt to find a suitable operation with a constant on one side.
+    Optional<uint64_t> C;
+    Register TestReg;
+    switch (Opc) {
+    default:
+      break;
+    case TargetOpcode::G_AND: {
+      TestReg = MI->getOperand(1).getReg();
+      Register ConstantReg = MI->getOperand(2).getReg();
+      auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+      if (!VRegAndVal) {
+        // AND commutes, check the other side for a constant.
+        // FIXME: Can we canonicalize the constant so that it's always on the
+        // same side at some point earlier?
+        std::swap(ConstantReg, TestReg);
+        VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+      }
+      if (VRegAndVal)
+        C = VRegAndVal->Value;
+    }
+    }
+
+    // Didn't find a constant. Bail out of the loop.
+    if (!C)
       break;
 
-    // NextReg is worth folding. Keep looking.
+    // We found a suitable instruction with a constant. Check to see if we can
+    // walk through the instruction.
+    Register NextReg;
+    switch (Opc) {
+    default:
+      break;
+    case TargetOpcode::G_AND:
+      // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
+      if ((*C >> Bit) & 1)
+        NextReg = TestReg;
+      break;
+    }
+
+    // Check if we found anything worth folding.
+    if (!NextReg.isValid() || !MRI.hasOneUse(NextReg))
+      break;
     Reg = NextReg;
   }
+
   return Reg;
 }
 
@@ -1062,7 +1107,7 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
   // Try to optimize the TB(N)Z.
   uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
   Register TestReg = AndInst->getOperand(1).getReg();
-  TestReg = getTestBitReg(TestReg, MRI);
+  TestReg = getTestBitReg(TestReg, Bit, MRI);
 
   // Choose the correct TB(N)Z opcode to use.
   unsigned Opc = 0;

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir
new file mode 100644
index 000000000000..bd65bfc01741
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-and-tbz-tbnz.mir
@@ -0,0 +1,113 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Check folding an AND into a G_BRCOND which has been matched as a TB(N)Z.
+...
+---
+name:            fold_and_rhs
+alignment:       4
+legalized:       true
+regBankSelected: true
+body:             |
+  ; CHECK-LABEL: name: fold_and_rhs
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   %copy:gpr64all = COPY $x0
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32all = COPY %copy.sub_32
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+  ; CHECK:   TBNZW [[COPY1]], 3, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %copy:gpr(s64) = COPY $x0
+    %bit:gpr(s64) = G_CONSTANT i64 8
+    %zero:gpr(s64) = G_CONSTANT i64 0
+    %fold_cst:gpr(s64) = G_CONSTANT i64 8
+
+    ; tbnz (and x, 8), 3 == tbnz x, 3 because the third bit of x & 8 is 1 when
+    ; the third bit of x is 1.
+    %fold_me:gpr(s64) = G_AND %copy, %fold_cst
+
+    %and:gpr(s64) = G_AND %fold_me, %bit
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+    G_BRCOND %cmp_trunc(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+...
+---
+name:            fold_and_lhs
+alignment:       4
+legalized:       true
+regBankSelected: true
+body:             |
+  ; CHECK-LABEL: name: fold_and_lhs
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   %copy:gpr64all = COPY $x0
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32all = COPY %copy.sub_32
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+  ; CHECK:   TBNZW [[COPY1]], 3, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %copy:gpr(s64) = COPY $x0
+    %bit:gpr(s64) = G_CONSTANT i64 8
+    %zero:gpr(s64) = G_CONSTANT i64 0
+    %fold_cst:gpr(s64) = G_CONSTANT i64 8
+
+    ; Same as above, but with the constant on the other side.
+    %fold_me:gpr(s64) = G_AND %fold_cst, %copy
+
+    %and:gpr(s64) = G_AND %fold_me, %bit
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+    G_BRCOND %cmp_trunc(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+...
+---
+name:            dont_fold_and
+alignment:       4
+legalized:       true
+regBankSelected: true
+body:             |
+  ; CHECK-LABEL: name: dont_fold_and
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   %copy:gpr64 = COPY $x0
+  ; CHECK:   %fold_me:gpr64sp = ANDXri %copy, 4098
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
+  ; CHECK:   [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
+  ; CHECK:   TBNZW [[COPY2]], 3, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %copy:gpr(s64) = COPY $x0
+    %bit:gpr(s64) = G_CONSTANT i64 8
+    %zero:gpr(s64) = G_CONSTANT i64 0
+
+    ; tbnz (and x, 7), 3 != tbnz x, 3, because the third bit of x & 7 is always
+    ; zero.
+    %fold_cst:gpr(s64) = G_CONSTANT i64 7
+
+    %fold_me:gpr(s64) = G_AND %copy, %fold_cst
+    %and:gpr(s64) = G_AND %fold_me, %bit
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+    G_BRCOND %cmp_trunc(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR