[llvm] cd9a52b - [AArch64][GlobalISel] Fold binops on the true side of G_SELECT

Tue Dec 8 10:43:36 PST 2020

Author: Jessica Paquette
Date: 2020-12-08T10:42:59-08:00
New Revision: cd9a52b99e685e8a77dd85d25c7d1ec8b86b9f55

URL: https://github.com/llvm/llvm-project/commit/cd9a52b99e685e8a77dd85d25c7d1ec8b86b9f55
DIFF: https://github.com/llvm/llvm-project/commit/cd9a52b99e685e8a77dd85d25c7d1ec8b86b9f55.diff

LOG: [AArch64][GlobalISel] Fold binops on the true side of G_SELECT

This implements the following folds:

```
G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc
G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc
```

This is similar to the folds introduced in
5bc0bd05e6a8d788e08cdf3d154f3a33202aee53.

In 5bc0bd05e6a8d788e08cdf3d154f3a33202aee53 I mentioned that we may prefer to do
this in AArch64PostLegalizerLowering.

I think that it's probably better to do this in the selector. The way we select
G_SELECT depends on what register banks end up being assigned to it. If we did
this in AArch64PostLegalizerLowering, then we'd end up checking *every* G_SELECT
to see if it's worth swapping operands. Doing it in the selector allows us to
restrict the optimization to only relevant G_SELECTs.

Also fix up some comments in `TryFoldBinOpIntoSelect` which are kind of
confusing IMO.

Example IR: https://godbolt.org/z/3qPGca

Differential Revision: https://reviews.llvm.org/D92860

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 493167e284cf..c447f75681e7 100644

--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -1033,31 +1033,36 @@ AArch64InstructionSelector::emitSelect(Register Dst, Register True,
   // By default, we'll try and emit a CSEL.
   unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
   bool Optimized = false;
-  auto TryFoldBinOpIntoSelect = [&Opc, &False, Is32Bit, &MRI]() {
+  auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI](Register &Reg,
+                                                           bool Invert) {
     // Attempt to fold:
     //
-    // sub = G_SUB 0, x
-    // select = G_SELECT cc, true, sub
+    // %sub = G_SUB 0, %x
+    // %select = G_SELECT cc, %reg, %sub
     //
     // Into:
-    // select = CSNEG true, x, cc
+    // %select = CSNEG %reg, %x, cc
     Register MatchReg;
-    if (mi_match(False, MRI, m_Neg(m_Reg(MatchReg)))) {
+    if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
       Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
-      False = MatchReg;
+      Reg = MatchReg;
+      if (Invert)
+        CC = AArch64CC::getInvertedCondCode(CC);
       return true;
     }
 
     // Attempt to fold:
     //
-    // xor = G_XOR x, -1
-    // select = G_SELECT cc, true, xor
+    // %xor = G_XOR %x, -1
+    // %select = G_SELECT cc, %reg, %xor
     //
     // Into:
-    // select = CSINV true, x, cc
-    if (mi_match(False, MRI, m_Not(m_Reg(MatchReg)))) {
+    // %select = CSINV %reg, %x, cc
+    if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
       Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
-      False = MatchReg;
+      Reg = MatchReg;
+      if (Invert)
+        CC = AArch64CC::getInvertedCondCode(CC);
       return true;
     }
 
@@ -1139,7 +1144,8 @@ AArch64InstructionSelector::emitSelect(Register Dst, Register True,
     return false;
   };
 
-  Optimized |= TryFoldBinOpIntoSelect();
+  Optimized |= TryFoldBinOpIntoSelect(False, /*Invert = */ false);
+  Optimized |= TryFoldBinOpIntoSelect(True, /*Invert = */ true);
   Optimized |= TryOptSelectCst();
   auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
   constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
index 2d7928f906ed..d2eee15c5cb6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
@@ -376,6 +376,36 @@ body:             |
     $w0 = COPY %select(s32)
     RET_ReallyLR implicit $w0
 
+...
+---
+name:            csneg_inverted_cc
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1, $w2
+    ; G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc
+
+    ; CHECK-LABEL: name: csneg_inverted_cc
+    ; CHECK: liveins: $w0, $w1, $w2
+    ; CHECK: %reg0:gpr32 = COPY $w0
+    ; CHECK: %reg1:gpr32 = COPY $w1
+    ; CHECK: %f:gpr32 = COPY $w2
+    ; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri %reg0, 0, implicit-def $nzcv
+    ; CHECK: %select:gpr32 = CSNEGWr %reg1, %f, 0, implicit $nzcv
+    ; CHECK: $w0 = COPY %select
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg0:gpr(s32) = COPY $w0
+    %cond:gpr(s1) = G_TRUNC %reg0(s32)
+    %reg1:gpr(s32) = COPY $w1
+    %f:gpr(s32) = COPY $w2
+    %zero:gpr(s32) = G_CONSTANT i32 0
+    %sub:gpr(s32) = G_SUB %zero(s32), %reg1
+    %select:gpr(s32) = G_SELECT %cond(s1), %sub, %f
+    $w0 = COPY %select(s32)
+    RET_ReallyLR implicit $w0
+
 ...
 ---
 name:            csneg_s64
@@ -467,6 +497,36 @@ body:             |
     $w0 = COPY %select(s32)
     RET_ReallyLR implicit $w0
 
+...
+---
+name:            csinv_inverted_cc
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1, $w2
+    ; G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc
+
+    ; CHECK-LABEL: name: csinv_inverted_cc
+    ; CHECK: liveins: $w0, $w1, $w2
+    ; CHECK: %reg0:gpr32 = COPY $w0
+    ; CHECK: %reg1:gpr32 = COPY $w1
+    ; CHECK: %f:gpr32 = COPY $w2
+    ; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri %reg0, 0, implicit-def $nzcv
+    ; CHECK: %select:gpr32 = CSINVWr %reg1, %f, 0, implicit $nzcv
+    ; CHECK: $w0 = COPY %select
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg0:gpr(s32) = COPY $w0
+    %reg1:gpr(s32) = COPY $w1
+    %cond:gpr(s1) = G_TRUNC %reg0(s32)
+    %f:gpr(s32) = COPY $w2
+    %negative_one:gpr(s32) = G_CONSTANT i32 -1
+    %xor:gpr(s32) = G_XOR %reg1(s32), %negative_one
+    %select:gpr(s32) = G_SELECT %cond(s1), %xor, %f
+    $w0 = COPY %select(s32)
+    RET_ReallyLR implicit $w0
+
 ...
 ---
 name:            csinv_s64