[llvm] 7212f65 - [AArch64][GlobalISel] Fold G_LSHR into test bit calculation

Wed Feb 5 15:14:30 PST 2020

Author: Jessica Paquette
Date: 2020-02-05T15:14:12-08:00
New Revision: 7212f65784c12cd2e01c909b43e2c4c597637195

URL: https://github.com/llvm/llvm-project/commit/7212f65784c12cd2e01c909b43e2c4c597637195
DIFF: https://github.com/llvm/llvm-project/commit/7212f65784c12cd2e01c909b43e2c4c597637195.diff

LOG: [AArch64][GlobalISel] Fold G_LSHR into test bit calculation

Add support for walking through G_LSHR in `getTestBitReg`. Equivalent to the
code in `getTestBitOperand` in AArch64ISelLowering.

```
(tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
```

Differential Revision: https://reviews.llvm.org/D74077

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index f933db55cc29..1013607839e6 100644

--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -1040,6 +1040,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
       break;
     }
     case TargetOpcode::G_ASHR:
+    case TargetOpcode::G_LSHR:
     case TargetOpcode::G_SHL: {
       TestReg = MI->getOperand(1).getReg();
       auto VRegAndVal =
@@ -1082,6 +1083,13 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
       if (Bit >= TestRegSize)
         Bit = TestRegSize - 1;
       break;
+    case TargetOpcode::G_LSHR:
+      // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
+      if ((Bit + *C) < TestRegSize) {
+        NextReg = TestReg;
+        Bit = Bit + *C;
+      }
+      break;
     case TargetOpcode::G_XOR:
       // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
       // appropriate.

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir
index 3e73aff2c800..c71822475786 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir
@@ -259,3 +259,144 @@ body:             |
     G_BR %bb.0
   bb.1:
     RET_ReallyLR
+
+...
+---
+name:            fold_lshr
+alignment:       4
+legalized:       true
+regBankSelected: true
+body:             |
+  ; CHECK-LABEL: name: fold_lshr
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   %copy:gpr32 = COPY $w0
+  ; CHECK:   TBNZW %copy, 4, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %copy:gpr(s32) = COPY $w0
+    %bit:gpr(s32) = G_CONSTANT i32 8
+    %zero:gpr(s32) = G_CONSTANT i32 0
+
+    ; We should get 4 as the test bit.
+    %fold_cst:gpr(s32) = G_CONSTANT i32 1
+    %fold_me:gpr(s32) = G_LSHR %copy, %fold_cst
+
+    %and:gpr(s32) = G_AND %fold_me, %bit
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero
+    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+    G_BRCOND %cmp_trunc(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+
+...
+---
+name:            fold_lshr_2
+alignment:       4
+legalized:       true
+regBankSelected: true
+body:             |
+  ; CHECK-LABEL: name: fold_lshr_2
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   %copy:gpr64 = COPY $x0
+  ; CHECK:   TBNZX %copy, 32, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %copy:gpr(s64) = COPY $x0
+    %bit:gpr(s64) = G_CONSTANT i64 8
+    %zero:gpr(s64) = G_CONSTANT i64 0
+
+    ; We're testing a s64.
+    ; 3 + 29 = 32, which is less than 63, so we can fold.
+    %fold_cst:gpr(s64) = G_CONSTANT i64 29
+    %fold_me:gpr(s64) = G_LSHR %copy, %fold_cst
+
+    %and:gpr(s64) = G_AND %fold_me, %bit
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
+    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+    G_BRCOND %cmp_trunc(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+
+...
+---
+name:            dont_fold_lshr
+alignment:       4
+legalized:       true
+regBankSelected: true
+body:             |
+  ; CHECK-LABEL: name: dont_fold_lshr
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   %copy:gpr32 = COPY $w0
+  ; CHECK:   %fold_cst:gpr32 = MOVi32imm 29
+  ; CHECK:   %fold_me:gpr32 = LSRVWr %copy, %fold_cst
+  ; CHECK:   TBNZW %fold_me, 3, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %copy:gpr(s32) = COPY $w0
+    %bit:gpr(s32) = G_CONSTANT i32 8
+    %zero:gpr(s32) = G_CONSTANT i32 0
+
+    ; We're testing a s32.
+    ; 3 + 29 = 32, which is greater than 31, so we don't fold.
+    %fold_cst:gpr(s32) = G_CONSTANT i32 29
+    %fold_me:gpr(s32) = G_LSHR %copy, %fold_cst
+
+    %and:gpr(s32) = G_AND %fold_me, %bit
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero
+    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+    G_BRCOND %cmp_trunc(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR
+
+...
+---
+name:            lshr_negative
+alignment:       4
+legalized:       true
+regBankSelected: true
+body:             |
+  ; CHECK-LABEL: name: lshr_negative
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   %copy:gpr32 = COPY $w0
+  ; CHECK:   TBNZW %copy, 2, %bb.1
+  ; CHECK:   B %bb.0
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    successors: %bb.0, %bb.1
+    liveins: $x0
+    %copy:gpr(s32) = COPY $w0
+    %bit:gpr(s32) = G_CONSTANT i32 8
+    %zero:gpr(s32) = G_CONSTANT i32 0
+
+    ; Constant becomes very large and wraps around. Since it's larger than the
+    ; bit width, that means the LSHR is poison, so we can still fold.
+    %fold_cst:gpr(s32) = G_CONSTANT i32 -1
+    %fold_me:gpr(s32) = G_LSHR %copy, %fold_cst
+
+    %and:gpr(s32) = G_AND %fold_me, %bit
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero
+    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+    G_BRCOND %cmp_trunc(s1), %bb.1
+    G_BR %bb.0
+  bb.1:
+    RET_ReallyLR