[llvm] r339472 - [ARM] Adjust AND immediates to make them cheaper to select.

Eli Friedman via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 10 14:21:54 PDT 2018


Author: efriedma
Date: Fri Aug 10 14:21:53 2018
New Revision: 339472

URL: http://llvm.org/viewvc/llvm-project?rev=339472&view=rev
Log:
[ARM] Adjust AND immediates to make them cheaper to select.

LLVM normally prefers to minimize the number of bits set in an AND
immediate, but that doesn't always match the available ARM instructions.
In Thumb1 mode, prefer uxtb or uxth where possible; otherwise, prefer
a two-instruction sequence movs+ands or movs+bics.

Some potential improvements outlined in
ARMTargetLowering::targetShrinkDemandedConstant, but seems to work
pretty well already.

The ARMISelDAGToDAG fix ensures we don't generate an invalid UBFX
instruction due to a larger-than-expected mask. (It's orthogonal, in
some sense, but as far as I can tell it's either impossible or nearly
impossible to reproduce the bug without this change.)

According to my testing, this seems to consistently improve codesize by
a small amount by forming bic more often for ISD::AND with an immediate.

Differential Revision: https://reviews.llvm.org/D50030


Modified:
    llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.h
    llvm/trunk/test/CodeGen/ARM/Windows/alloca.ll
    llvm/trunk/test/CodeGen/ARM/Windows/vla.ll
    llvm/trunk/test/CodeGen/ARM/and-cmpz.ll
    llvm/trunk/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
    llvm/trunk/test/CodeGen/ARM/select_const.ll
    llvm/trunk/test/CodeGen/Thumb/bic_imm.ll
    llvm/trunk/test/CodeGen/Thumb/shift-and.ll

Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=339472&r1=339471&r2=339472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Fri Aug 10 14:21:53 2018
@@ -2309,6 +2309,11 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExt
                                 Srl_imm)) {
         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
 
+        // Mask off the unnecessary bits of the AND immediate; normally
+        // DAGCombine will do this, but that might not happen if
+        // targetShrinkDemandedConstant chooses a different immediate.
+        And_imm &= -1U >> Srl_imm;
+
         // Note: The width operand is encoded as width-1.
         unsigned Width = countTrailingOnes(And_imm) - 1;
         unsigned LSB = Srl_imm;

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=339472&r1=339471&r2=339472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri Aug 10 14:21:53 2018
@@ -13602,6 +13602,83 @@ void ARMTargetLowering::computeKnownBits
   }
 }
 
+bool
+ARMTargetLowering::targetShrinkDemandedConstant(SDValue Op,
+                                                const APInt &DemandedAPInt,
+                                                TargetLoweringOpt &TLO) const {
+  // Delay optimization, so we don't have to deal with illegal types, or block
+  // optimizations.
+  if (!TLO.LegalOps)
+    return false;
+
+  // Only optimize AND for now.
+  if (Op.getOpcode() != ISD::AND)
+    return false;
+
+  EVT VT = Op.getValueType();
+
+  // Ignore vectors.
+  if (VT.isVector())
+    return false;
+
+  assert(VT == MVT::i32 && "Unexpected integer type");
+
+  // Make sure the RHS really is a constant.
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+  if (!C)
+    return false;
+
+  unsigned Mask = C->getZExtValue();
+
+  // If mask is zero, nothing to do.
+  if (!Mask)
+    return false;
+
+  unsigned Demanded = DemandedAPInt.getZExtValue();
+  unsigned ShrunkMask = Mask & Demanded;
+  unsigned ExpandedMask = Mask | ~Demanded;
+
+  auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
+    return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
+  };
+  auto UseMask = [this, Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
+    if (NewMask == Mask)
+      return true;
+    SDLoc DL(Op);
+    SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
+    SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
+    return TLO.CombineTo(Op, NewOp);
+  };
+
+  // Prefer uxtb mask.
+  if (IsLegalMask(0xFF))
+    return UseMask(0xFF);
+
+  // Prefer uxth mask.
+  if (IsLegalMask(0xFFFF))
+    return UseMask(0xFFFF);
+
+  // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
+  // FIXME: Prefer a contiguous sequence of bits for other optimizations.
+  if (ShrunkMask < 256)
+    return UseMask(ShrunkMask);
+
+  // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
+  // FIXME: Prefer a contiguous sequence of bits for other optimizations.
+  if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
+    return UseMask(ExpandedMask);
+
+  // Potential improvements:
+  //
+  // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
+  // We could try to prefer Thumb1 immediates which can be lowered to a
+  // two-instruction sequence.
+  // We could try to recognize more legal ARM/Thumb2 immediates here.
+
+  return false;
+}
+
+
 //===----------------------------------------------------------------------===//
 //                           ARM Inline Assembly Support
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=339472&r1=339471&r2=339472&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Fri Aug 10 14:21:53 2018
@@ -389,6 +389,9 @@ class VectorType;
                                        const SelectionDAG &DAG,
                                        unsigned Depth) const override;
 
+    bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+                                      TargetLoweringOpt &TLO) const override;
+
 
     bool ExpandInlineAsm(CallInst *CI) const override;
 

Modified: llvm/trunk/test/CodeGen/ARM/Windows/alloca.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/Windows/alloca.ll?rev=339472&r1=339471&r2=339472&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/Windows/alloca.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/Windows/alloca.ll Fri Aug 10 14:21:53 2018
@@ -19,7 +19,7 @@ entry:
 ; because we do not have the kill flag on R0.
 ; CHECK: mov.w [[R1:lr]], #7
 ; CHECK: add.w [[R0:r[0-9]+]], [[R1]], [[R0]], lsl #2
-; CHECK: bic [[R0]], [[R0]], #7
+; CHECK: bic [[R0]], [[R0]], #4
 ; CHECK: lsrs r4, [[R0]], #2
 ; CHECK: bl __chkstk
 ; CHECK: sub.w sp, sp, r4

Modified: llvm/trunk/test/CodeGen/ARM/Windows/vla.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/Windows/vla.ll?rev=339472&r1=339471&r2=339472&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/Windows/vla.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/Windows/vla.ll Fri Aug 10 14:21:53 2018
@@ -14,13 +14,13 @@ entry:
 }
 
 ; CHECK-SMALL-CODE:   adds [[R4:r[0-9]+]], #7
-; CHECK-SMALL-CODE:   bic [[R4]], [[R4]], #7
+; CHECK-SMALL-CODE:   bic [[R4]], [[R4]], #4
 ; CHECK-SMALL-CODE:   lsrs r4, [[R4]], #2
 ; CHECK-SMALL-CODE:   bl __chkstk
 ; CHECK-SMALL-CODE:   sub.w sp, sp, r4
 
 ; CHECK-LARGE-CODE:   adds  [[R4:r[0-9]+]], #7
-; CHECK-LARGE-CODE:   bic   [[R4]], [[R4]], #7
+; CHECK-LARGE-CODE:   bic   [[R4]], [[R4]], #4
 ; CHECK-LARGE-CODE:   lsrs  r4, [[R4]], #2
 ; CHECK-LARGE-CODE:   movw  [[IP:r[0-9]+]], :lower16:__chkstk
 ; CHECK-LARGE-CODE:   movt  [[IP]], :upper16:__chkstk

Modified: llvm/trunk/test/CodeGen/ARM/and-cmpz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/and-cmpz.ll?rev=339472&r1=339471&r2=339472&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/and-cmpz.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/and-cmpz.ll Fri Aug 10 14:21:53 2018
@@ -89,11 +89,10 @@ false:
 }
 
 ; CHECK-LABEL: i16_cmpz:
-; T1:      movs    r2, #127
-; T1-NEXT: lsls    r2, r2, #9
-; T1-NEXT: ands    r2, r0
-; T1-NEXT: lsrs    r0, r2, #9
-; T2:      and     r0, r0, #65024
+; T1:      uxth    r0, r0
+; T1-NEXT: lsrs    r0, r0, #9
+; T1-NEXT: bne
+; T2:      uxth    r0, r0
 ; T2-NEXT: movs    r2, #0
 ; T2-NEXT: cmp.w   r2, r0, lsr #9
 define void @i16_cmpz(i16 %x, void (i32)* %foo) {

Modified: llvm/trunk/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/illegal-bitfield-loadstore.ll?rev=339472&r1=339471&r2=339472&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/illegal-bitfield-loadstore.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/illegal-bitfield-loadstore.ll Fri Aug 10 14:21:53 2018
@@ -30,10 +30,8 @@ define void @i24_and_or(i24* %a) {
 ; LE-LABEL: i24_and_or:
 ; LE:       @ %bb.0:
 ; LE-NEXT:    ldrh r1, [r0]
-; LE-NEXT:    mov r2, #16256
-; LE-NEXT:    orr r2, r2, #49152
 ; LE-NEXT:    orr r1, r1, #384
-; LE-NEXT:    and r1, r1, r2
+; LE-NEXT:    bic r1, r1, #127
 ; LE-NEXT:    strh r1, [r0]
 ; LE-NEXT:    mov pc, lr
 ;

Modified: llvm/trunk/test/CodeGen/ARM/select_const.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/select_const.ll?rev=339472&r1=339471&r2=339472&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/select_const.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/select_const.ll Fri Aug 10 14:21:53 2018
@@ -314,9 +314,8 @@ define i64 @opaque_constant2(i1 %cond, i
 ; CHECK-NEXT:    mov r1, #1
 ; CHECK-NEXT:    tst r0, #1
 ; CHECK-NEXT:    orr r1, r1, #65536
-; CHECK-NEXT:    mov r0, r1
-; CHECK-NEXT:    moveq r0, #23
-; CHECK-NEXT:    and r0, r0, r1
+; CHECK-NEXT:    moveq r1, #23
+; CHECK-NEXT:    bic r0, r1, #22
 ; CHECK-NEXT:    mov r1, #0
 ; CHECK-NEXT:    mov pc, lr
   %sel = select i1 %cond, i64 65537, i64 23

Modified: llvm/trunk/test/CodeGen/Thumb/bic_imm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/bic_imm.ll?rev=339472&r1=339471&r2=339472&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb/bic_imm.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb/bic_imm.ll Fri Aug 10 14:21:53 2018
@@ -39,22 +39,77 @@ entry:
 define void @truncated(i16 %a, i16* %p) {
 ; CHECK-T1-LABEL: truncated:
 ; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    ldr r2, .LCPI2_0
+; CHECK-T1-NEXT:    movs r2, #128
+; CHECK-T1-NEXT:    bics r0, r2
+; CHECK-T1-NEXT:    strh r0, [r1]
+; CHECK-T1-NEXT:    bx lr
+;
+; CHECK-T2-LABEL: truncated:
+; CHECK-T2:       @ %bb.0:
+; CHECK-T2-NEXT:    bic r0, r0, #128
+; CHECK-T2-NEXT:    strh r0, [r1]
+; CHECK-T2-NEXT:    bx lr
+  %and = and i16 %a, -129
+  store i16 %and, i16* %p
+  ret void
+}
+
+define void @truncated_neg2(i16 %a, i16* %p) {
+; CHECK-T1-LABEL: truncated_neg2:
+; CHECK-T1:       @ %bb.0:
+; CHECK-T1-NEXT:    movs r2, #1
+; CHECK-T1-NEXT:    bics r0, r2
+; CHECK-T1-NEXT:    strh r0, [r1]
+; CHECK-T1-NEXT:    bx lr
+;
+; CHECK-T2-LABEL: truncated_neg2:
+; CHECK-T2:       @ %bb.0:
+; CHECK-T2-NEXT:    bic r0, r0, #1
+; CHECK-T2-NEXT:    strh r0, [r1]
+; CHECK-T2-NEXT:    bx lr
+  %and = and i16 %a, -2
+  store i16 %and, i16* %p
+  ret void
+}
+
+define void @truncated_neg256(i16 %a, i16* %p) {
+; CHECK-T1-LABEL: truncated_neg256:
+; CHECK-T1:       @ %bb.0:
+; CHECK-T1-NEXT:    movs r2, #255
+; CHECK-T1-NEXT:    bics r0, r2
+; CHECK-T1-NEXT:    strh r0, [r1]
+; CHECK-T1-NEXT:    bx lr
+;
+; CHECK-T2-LABEL: truncated_neg256:
+; CHECK-T2:       @ %bb.0:
+; CHECK-T2-NEXT:    bic r0, r0, #255
+; CHECK-T2-NEXT:    strh r0, [r1]
+; CHECK-T2-NEXT:    bx lr
+  %and = and i16 %a, -256
+  store i16 %and, i16* %p
+  ret void
+}
+
+; FIXME: Thumb2 supports "bic r0, r0, #510"
+define void @truncated_neg511(i16 %a, i16* %p) {
+; CHECK-T1-LABEL: truncated_neg511:
+; CHECK-T1:       @ %bb.0:
+; CHECK-T1-NEXT:    ldr r2, .LCPI5_0
 ; CHECK-T1-NEXT:    ands r2, r0
 ; CHECK-T1-NEXT:    strh r2, [r1]
 ; CHECK-T1-NEXT:    bx lr
 ; CHECK-T1-NEXT:    .p2align 2
 ; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:  .LCPI2_0:
-; CHECK-T1-NEXT:    .long 65407 @ 0xff7f
+; CHECK-T1-NEXT:  .LCPI5_0:
+; CHECK-T1-NEXT:    .long 65025 @ 0xfe01
 ;
-; CHECK-T2-LABEL: truncated:
+; CHECK-T2-LABEL: truncated_neg511:
 ; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    movw r2, #65407
+; CHECK-T2-NEXT:    movw r2, #65025
 ; CHECK-T2-NEXT:    ands r0, r2
 ; CHECK-T2-NEXT:    strh r0, [r1]
 ; CHECK-T2-NEXT:    bx lr
-  %and = and i16 %a, -129
+  %and = and i16 %a, -511
   store i16 %and, i16* %p
   ret void
 }

Modified: llvm/trunk/test/CodeGen/Thumb/shift-and.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/shift-and.ll?rev=339472&r1=339471&r2=339472&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb/shift-and.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb/shift-and.ll Fri Aug 10 14:21:53 2018
@@ -46,7 +46,7 @@ define i32 @test4(i32 %x) {
 ; CHECK-LABEL: test4:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    lsls r0, r0, #4
-; CHECK-NEXT:    movs r1, #127
+; CHECK-NEXT:    movs r1, #112
 ; CHECK-NEXT:    bics r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
@@ -110,16 +110,24 @@ entry:
 define i32 @test9(i32 %x) {
 ; CHECK-LABEL: test9:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    lsrs r1, r0, #2
-; CHECK-NEXT:    ldr r0, .LCPI8_0
-; CHECK-NEXT:    ands r0, r1
+; CHECK-NEXT:    lsrs r0, r0, #2
+; CHECK-NEXT:    movs r1, #1
+; CHECK-NEXT:    bics r0, r1
 ; CHECK-NEXT:    bx lr
-; CHECK-NEXT:    .p2align 2
-; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:  .LCPI8_0:
-; CHECK-NEXT:    .long 1073741822 @ 0x3ffffffe
 entry:
   %and = lshr i32 %x, 2
   %shr = and i32 %and, 1073741822
   ret i32 %shr
 }
+
+define i32 @test10(i32 %x) {
+; CHECK-LABEL: test10:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    lsls r0, r0, #2
+; CHECK-NEXT:    uxtb r0, r0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = shl i32 %x, 2
+  %shr = and i32 %0, 255
+  ret i32 %shr
+}




More information about the llvm-commits mailing list