[llvm] r349857 - [ARM] Complete the Thumb1 shift+and->shift+shift transforms.
Eli Friedman via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 20 15:39:55 PST 2018
Author: efriedma
Date: Thu Dec 20 15:39:54 2018
New Revision: 349857
URL: http://llvm.org/viewvc/llvm-project?rev=349857&view=rev
Log:
[ARM] Complete the Thumb1 shift+and->shift+shift transforms.
This saves materializing the immediate. The additional forms are less
common (they don't usually show up for bitfield insert/extract), but
they're still relevant.
I had to add a new target hook to prevent DAGCombine from reversing the
transform. That isn't the only possible way to solve the conflict, but
it seems straightforward enough.
Differential Revision: https://reviews.llvm.org/D55630
Modified:
llvm/trunk/include/llvm/CodeGen/TargetLowering.h
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.h
llvm/trunk/test/CodeGen/Thumb/shift-and.ll
Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=349857&r1=349856&r2=349857&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Thu Dec 20 15:39:54 2018
@@ -3090,6 +3090,15 @@ public:
return true;
}
+ /// Return true if it is profitable to fold a pair of shifts into a mask.
+ /// This is usually true on most targets. But some targets, like Thumb1,
+ /// have immediate shift instructions, but no immediate "and" instruction;
+ /// this makes the fold unprofitable.
+ virtual bool shouldFoldShiftPairToMask(const SDNode *N,
+ CombineLevel Level) const {
+ return true;
+ }
+
// Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern
// to a shuffle and a truncate.
// Example of such a combine:
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=349857&r1=349856&r2=349857&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Dec 20 15:39:54 2018
@@ -6518,7 +6518,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N)
// (and (srl x, (sub c1, c2), MASK)
// Only fold this if the inner shift has no other uses -- if it does, folding
// this will increase the total number of instructions.
- if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
+ TLI.shouldFoldShiftPairToMask(N, Level)) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
uint64_t c1 = N0C1->getZExtValue();
if (c1 < OpSizeInBits) {
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=349857&r1=349856&r2=349857&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Thu Dec 20 15:39:54 2018
@@ -10432,6 +10432,18 @@ ARMTargetLowering::isDesirableToCommuteW
return false;
}
+bool
+ARMTargetLowering::shouldFoldShiftPairToMask(const SDNode *N,
+ CombineLevel Level) const {
+ if (!Subtarget->isThumb1Only())
+ return true;
+
+ if (Level == BeforeLegalizeTypes)
+ return true;
+
+ return false;
+}
+
static SDValue PerformSHLSimplify(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *ST) {
@@ -10735,6 +10747,12 @@ static SDValue CombineANDShift(SDNode *N
if (!C2 || C2 >= 32)
return SDValue();
+ // Clear irrelevant bits in the mask.
+ if (LeftShift)
+ C1 &= (-1U << C2);
+ else
+ C1 &= (-1U >> C2);
+
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
@@ -10742,9 +10760,7 @@ static SDValue CombineANDShift(SDNode *N
// "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
// transform to a pair of shifts, to save materializing c1.
- // First pattern: right shift, and c1+1 is a power of two.
- // FIXME: Also check reversed pattern (left shift, and ~c1+1 is a power
- // of two).
+ // First pattern: right shift, then mask off leading bits.
// FIXME: Use demanded bits?
if (!LeftShift && isMask_32(C1)) {
uint32_t C3 = countLeadingZeros(C1);
@@ -10756,13 +10772,23 @@ static SDValue CombineANDShift(SDNode *N
}
}
- // Second pattern: left shift, and (c1>>c2)+1 is a power of two.
- // FIXME: Also check reversed pattern (right shift, and ~(c1<<c2)+1
- // is a power of two).
+ // First pattern, reversed: left shift, then mask off trailing bits.
+ if (LeftShift && isMask_32(~C1)) {
+ uint32_t C3 = countTrailingZeros(C1);
+ if (C2 < C3) {
+ SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
+ DAG.getConstant(C3 - C2, DL, MVT::i32));
+ return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
+ DAG.getConstant(C3, DL, MVT::i32));
+ }
+ }
+
+ // Second pattern: left shift, then mask off leading bits.
// FIXME: Use demanded bits?
if (LeftShift && isShiftedMask_32(C1)) {
+ uint32_t Trailing = countTrailingZeros(C1);
uint32_t C3 = countLeadingZeros(C1);
- if (C2 + C3 < 32 && C1 == ((-1U << (C2 + C3)) >> C3)) {
+ if (Trailing == C2 && C2 + C3 < 32) {
SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
DAG.getConstant(C2 + C3, DL, MVT::i32));
return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
@@ -10770,6 +10796,19 @@ static SDValue CombineANDShift(SDNode *N
}
}
+ // Second pattern, reversed: right shift, then mask off trailing bits.
+ // FIXME: Handle other patterns of known/demanded bits.
+ if (!LeftShift && isShiftedMask_32(C1)) {
+ uint32_t Leading = countLeadingZeros(C1);
+ uint32_t C3 = countTrailingZeros(C1);
+ if (Leading == C2 && C2 + C3 < 32) {
+ SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
+ DAG.getConstant(C2 + C3, DL, MVT::i32));
+ return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
+ DAG.getConstant(C3, DL, MVT::i32));
+ }
+ }
+
// FIXME: Transform "(and (shl x, c2) c1)" ->
// "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
// c1.
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=349857&r1=349856&r2=349857&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Thu Dec 20 15:39:54 2018
@@ -593,6 +593,8 @@ class VectorType;
bool isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const override;
+ bool shouldFoldShiftPairToMask(const SDNode *N,
+ CombineLevel Level) const override;
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
Modified: llvm/trunk/test/CodeGen/Thumb/shift-and.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/shift-and.ll?rev=349857&r1=349856&r2=349857&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb/shift-and.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb/shift-and.ll Thu Dec 20 15:39:54 2018
@@ -45,9 +45,8 @@ entry:
define i32 @test4(i32 %x) {
; CHECK-LABEL: test4:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: lsls r0, r0, #4
-; CHECK-NEXT: movs r1, #112
-; CHECK-NEXT: bics r0, r1
+; CHECK-NEXT: lsrs r0, r0, #3
+; CHECK-NEXT: lsls r0, r0, #7
; CHECK-NEXT: bx lr
entry:
%0 = shl i32 %x, 4
@@ -84,9 +83,8 @@ entry:
define i32 @test7(i32 %x) {
; CHECK-LABEL: test7:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: lsrs r1, r0, #29
-; CHECK-NEXT: movs r0, #4
-; CHECK-NEXT: ands r0, r1
+; CHECK-NEXT: lsrs r0, r0, #31
+; CHECK-NEXT: lsls r0, r0, #2
; CHECK-NEXT: bx lr
entry:
%0 = lshr i32 %x, 29
@@ -110,9 +108,8 @@ entry:
define i32 @test9(i32 %x) {
; CHECK-LABEL: test9:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: lsrs r0, r0, #2
-; CHECK-NEXT: movs r1, #1
-; CHECK-NEXT: bics r0, r1
+; CHECK-NEXT: lsrs r0, r0, #3
+; CHECK-NEXT: lsls r0, r0, #1
; CHECK-NEXT: bx lr
entry:
%and = lshr i32 %x, 2
@@ -131,3 +128,63 @@ entry:
%shr = and i32 %0, 255
ret i32 %shr
}
+
+define i32 @test11(i32 %x) {
+; CHECK-LABEL: test11:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: lsrs r0, r0, #24
+; CHECK-NEXT: lsls r0, r0, #2
+; CHECK-NEXT: bx lr
+entry:
+ %shl = lshr i32 %x, 22
+ %and = and i32 %shl, 1020
+ ret i32 %and
+}
+
+define i32 @test12(i32 %x) {
+; CHECK-LABEL: test12:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: lsrs r0, r0, #3
+; CHECK-NEXT: lsls r0, r0, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = shl i32 %x, 1
+ %shr = and i32 %0, -16
+ ret i32 %shr
+}
+
+define i32 @test13(i32 %x) {
+; CHECK-LABEL: test13:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: lsrs r0, r0, #3
+; CHECK-NEXT: lsls r0, r0, #4
+; CHECK-NEXT: bx lr
+entry:
+ %shr = lshr i32 %x, 3
+ %shl = shl i32 %shr, 4
+ ret i32 %shl
+}
+
+define i32 @test14(i32 %x) {
+; CHECK-LABEL: test14:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: lsrs r0, r0, #6
+; CHECK-NEXT: lsls r0, r0, #10
+; CHECK-NEXT: bx lr
+entry:
+ %shl = shl i32 %x, 4
+ %and = and i32 %shl, -1024
+ ret i32 %and
+}
+
+define i32 @test15(i32 %x) {
+; CHECK-LABEL: test15:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: lsrs r0, r0, #4
+; CHECK-NEXT: lsls r0, r0, #3
+; CHECK-NEXT: bx lr
+entry:
+ %shr = lshr i32 %x, 4
+ %shl = shl i32 %shr, 3
+ ret i32 %shl
+}
More information about the llvm-commits
mailing list