[llvm] e0012c5 - [TargetLowering][DAGCombine][MSP430] Shift Amount Threshold in DAGCombine (3)

Mon Nov 11 01:19:39 PST 2019

Author: joanlluch
Date: 2019-11-11T10:18:25+01:00
New Revision: e0012c5d6acb568c77c7a6b845637f330e64515f

URL: https://github.com/llvm/llvm-project/commit/e0012c5d6acb568c77c7a6b845637f330e64515f
DIFF: https://github.com/llvm/llvm-project/commit/e0012c5d6acb568c77c7a6b845637f330e64515f.diff

LOG: [TargetLowering][DAGCombine][MSP430] Shift Amount Threshold in DAGCombine (3)

Summary:
Additional filtering of undesired shifts for targets that do not support them efficiently.

Related with  D69116 and  D69120

Applies the TLI.getShiftAmountThreshold hook to prevent undesired generation of shifts for the following IR code:

```
define i16 @testShiftBits(i16 %a) {
entry:
  %and = and i16 %a, -64
  %cmp = icmp eq i16 %and, 64
  %conv = zext i1 %cmp to i16
  ret i16 %conv
}

define i16 @testShiftBits_11(i16 %a) {
entry:
  %cmp = icmp ugt i16 %a, 63
  %conv = zext i1 %cmp to i16
  ret i16 %conv
}

define i16 @testShiftBits_12(i16 %a) {
entry:
  %cmp = icmp ult i16 %a, 64
  %conv = zext i1 %cmp to i16
  ret i16 %conv
}
```
The attached diff file shows the piece code in TargetLowering that is responsible for the generation of shifts in relation to the IR above.

Before applying this patch, shifts will be generated to replace non-legal icmp immediates. However, shifts may be undesired if they are even more expensive for the target.

For all my previous patches in this series (cited above) I added test cases for the MSP430 target. However, in this case, the target is not suitable for showing improvements related with this patch, because the MSP430 does not implement "isLegalICmpImmediate". The default implementation returns always true, therefore the patched code in TargetLowering is never reached for that target. Targets implementing both "isLegalICmpImmediate" and "getShiftAmountThreshold" will benefit from this.

The differential effect of this patch can only be shown for the MSP430 by temporarily implementing "isLegalICmpImmediate" to return false for large immediates. This is simulated with the implementation of a command line flag that was incorporated in D69975

This patch belongs to a initiative to "relax" the generation of shifts by LLVM for targets requiring it

Reviewers: spatel, lebedev.ri, asl

Reviewed By: spatel

Subscribers: lenary, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69326

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/MSP430/shift-amount-threshold-b.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f42581da82d9..96894613b4a5 100644

--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3655,11 +3655,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           const APInt &AndRHSC = AndRHS->getAPIntValue();
           if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
             unsigned ShiftBits = AndRHSC.countTrailingZeros();
-            SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
-                                        DAG.getConstant(ShiftBits, dl,
-                                                        ShiftTy));
-            SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
-            return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+            if (ShiftBits <= TLI.getShiftAmountThreshold(ShValTy)) {
+              SDValue Shift =
+                DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
+                            DAG.getConstant(ShiftBits, dl, ShiftTy));
+              SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
+              return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+            }
           }
         }
       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
@@ -3681,7 +3683,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         }
         NewC.lshrInPlace(ShiftBits);
         if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
-            isLegalICmpImmediate(NewC.getSExtValue())) {
+            isLegalICmpImmediate(NewC.getSExtValue()) &&
+            ShiftBits <= TLI.getShiftAmountThreshold(ShValTy)) {
           SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
                                       DAG.getConstant(ShiftBits, dl, ShiftTy));
           SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);

diff  --git a/llvm/test/CodeGen/MSP430/shift-amount-threshold-b.ll b/llvm/test/CodeGen/MSP430/shift-amount-threshold-b.ll
index 9022c33b3847..98efb30d8516 100644
--- a/llvm/test/CodeGen/MSP430/shift-amount-threshold-b.ll
+++ b/llvm/test/CodeGen/MSP430/shift-amount-threshold-b.ll
@@ -6,14 +6,8 @@
 define i16 @testSimplifySetCC_2(i16 %x) {
 ; CHECK-LABEL: testSimplifySetCC_2:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    clrc
-; CHECK-NEXT:    rrc r12
-; CHECK-NEXT:    rra r12
-; CHECK-NEXT:    rra r12
-; CHECK-NEXT:    rra r12
-; CHECK-NEXT:    rra r12
-; CHECK-NEXT:    rra r12
-; CHECK-NEXT:    cmp #1, r12
+; CHECK-NEXT:    and #-64, r12
+; CHECK-NEXT:    cmp #64, r12
 ; CHECK-NEXT:    mov r2, r12
 ; CHECK-NEXT:    rra r12
 ; CHECK-NEXT:    and #1, r12
@@ -30,18 +24,9 @@ entry:
 define i16 @testSimplifySetCC_3(i16 %x) {
 ; CHECK-LABEL: testSimplifySetCC_3:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    clrc
-; CHECK-NEXT:    rrc r12
-; CHECK-NEXT:    rra r12
-; CHECK-NEXT:    rra r12
-; CHECK-NEXT:    rra r12
-; CHECK-NEXT:    rra r12
-; CHECK-NEXT:    rra r12
-; CHECK-NEXT:    tst r12
-; CHECK-NEXT:    mov r2, r13
-; CHECK-NEXT:    rra r13
-; CHECK-NEXT:    mov #1, r12
-; CHECK-NEXT:    bic r13, r12
+; CHECK-NEXT:    cmp #64, r12
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    and #1, r12
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp ugt i16 %x, 63
@@ -54,17 +39,9 @@ entry:
 define i16 @testSimplifySetCC_4(i16 %x) {
 ; CHECK-LABEL: testSimplifySetCC_4:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    clrc
-; CHECK-NEXT:    rrc r12
-; CHECK-NEXT:    rra r12
-; CHECK-NEXT:    rra r12
-; CHECK-NEXT:    rra r12
-; CHECK-NEXT:    rra r12
-; CHECK-NEXT:    rra r12
-; CHECK-NEXT:    tst r12
-; CHECK-NEXT:    mov r2, r12
-; CHECK-NEXT:    rra r12
-; CHECK-NEXT:    and #1, r12
+; CHECK-NEXT:    cmp #64, r12
+; CHECK-NEXT:    mov #1, r12
+; CHECK-NEXT:    bic r2, r12
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp ult i16 %x, 64