[llvm] r333214 - [AArch64] Take advantage of variable shift/rotate amount implicit mod operation.
Geoff Berry via llvm-commits
llvm-commits at lists.llvm.org
Thu May 24 11:29:42 PDT 2018
Author: gberry
Date: Thu May 24 11:29:42 2018
New Revision: 333214
URL: http://llvm.org/viewvc/llvm-project?rev=333214&view=rev
Log:
[AArch64] Take advantage of variable shift/rotate amount implicit mod operation.
Summary:
Optimize code generated for variable shifts/rotates by taking advantage
of the implicit and/mod done on the variable shift amount register.
Resolves bug 27582 and bug 37421.
Reviewers: t.p.northover, qcolombet, MatzeB, javed.absar
Subscribers: rengolin, kristof.beyls, mcrosier, llvm-commits
Differential Revision: https://reviews.llvm.org/D46844
Added:
llvm/trunk/test/CodeGen/AArch64/shift-mod.ll
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/trunk/test/CodeGen/AArch64/arm64-long-shift.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp?rev=333214&r1=333213&r2=333214&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp Thu May 24 11:29:42 2018
@@ -168,6 +168,7 @@ public:
bool tryBitfieldExtractOpFromSExt(SDNode *N);
bool tryBitfieldInsertOp(SDNode *N);
bool tryBitfieldInsertInZeroOp(SDNode *N);
+ bool tryShiftAmountMod(SDNode *N);
bool tryReadRegister(SDNode *N);
bool tryWriteRegister(SDNode *N);
@@ -2441,6 +2442,111 @@ bool AArch64DAGToDAGISel::tryBitfieldIns
return true;
}
+/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
+/// variable shift/rotate instructions.
+bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ case ISD::ROTR:
+ Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
+ break;
+ case ISD::SHL:
+ Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
+ break;
+ case ISD::SRL:
+ Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
+ break;
+ case ISD::SRA:
+ Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
+ break;
+ default:
+ return false;
+ }
+
+ uint64_t Size;
+ uint64_t Bits;
+ if (VT == MVT::i32) {
+ Bits = 5;
+ Size = 32;
+ } else if (VT == MVT::i64) {
+ Bits = 6;
+ Size = 64;
+ } else
+ return false;
+
+ SDValue ShiftAmt = N->getOperand(1);
+ SDLoc DL(N);
+ SDValue NewShiftAmt;
+
+ // Skip over an extend of the shift amount.
+ if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
+ ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
+ ShiftAmt = ShiftAmt->getOperand(0);
+
+ if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
+ SDValue Add0 = ShiftAmt->getOperand(0);
+ SDValue Add1 = ShiftAmt->getOperand(1);
+ uint64_t Add0Imm;
+ uint64_t Add1Imm;
+ // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
+ // to avoid the ADD/SUB.
+ if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
+ NewShiftAmt = Add0;
+ // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
+ // generate a NEG instead of a SUB of a constant.
+ else if (ShiftAmt->getOpcode() == ISD::SUB &&
+ isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
+ (Add0Imm % Size == 0)) {
+ unsigned NegOpc;
+ unsigned ZeroReg;
+ EVT SubVT = ShiftAmt->getValueType(0);
+ if (SubVT == MVT::i32) {
+ NegOpc = AArch64::SUBWrr;
+ ZeroReg = AArch64::WZR;
+ } else {
+ assert(SubVT == MVT::i64);
+ NegOpc = AArch64::SUBXrr;
+ ZeroReg = AArch64::XZR;
+ }
+ SDValue Zero =
+ CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
+ MachineSDNode *Neg =
+ CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
+ NewShiftAmt = SDValue(Neg, 0);
+ } else
+ return false;
+ } else {
+ // If the shift amount is masked with an AND, check that the mask covers the
+ // bits that are implicitly ANDed off by the above opcodes and if so, skip
+ // the AND.
+ uint64_t MaskImm;
+ if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm))
+ return false;
+
+ if (countTrailingOnes(MaskImm) < Bits)
+ return false;
+
+ NewShiftAmt = ShiftAmt->getOperand(0);
+ }
+
+ // Narrow/widen the shift amount to match the size of the shift operation.
+ if (VT == MVT::i32)
+ NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
+ else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
+ SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
+ MachineSDNode *Ext = CurDAG->getMachineNode(
+ AArch64::SUBREG_TO_REG, DL, VT,
+ CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
+ NewShiftAmt = SDValue(Ext, 0);
+ }
+
+ SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
+ CurDAG->SelectNodeTo(N, Opc, VT, Ops);
+ return true;
+}
+
bool
AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
unsigned RegWidth) {
@@ -2707,6 +2813,11 @@ void AArch64DAGToDAGISel::Select(SDNode
return;
if (tryBitfieldInsertInZeroOp(Node))
return;
+ LLVM_FALLTHROUGH;
+ case ISD::ROTR:
+ case ISD::SHL:
+ if (tryShiftAmountMod(Node))
+ return;
break;
case ISD::SIGN_EXTEND:
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-long-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-long-shift.ll?rev=333214&r1=333213&r2=333214&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-long-shift.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-long-shift.ll Thu May 24 11:29:42 2018
@@ -2,19 +2,17 @@
define i128 @shl(i128 %r, i128 %s) nounwind readnone {
; CHECK-LABEL: shl:
-; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
-; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
+; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2
; CHECK: lsr [[LO_FOR_HI_NORMAL:x[0-9]+]], x0, [[REV_SHIFT]]
; CHECK: cmp x2, #0
; CHECK: csel [[LO_FOR_HI:x[0-9]+]], xzr, [[LO_FOR_HI_NORMAL]], eq
; CHECK: lsl [[HI_FOR_HI:x[0-9]+]], x1, x2
; CHECK: orr [[HI_NORMAL:x[0-9]+]], [[LO_FOR_HI]], [[HI_FOR_HI]]
+; CHECK: lsl [[HI_BIG_SHIFT:x[0-9]+]], x0, x2
; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
-; CHECK: lsl [[HI_BIG_SHIFT:x[0-9]+]], x0, [[EXTRA_SHIFT]]
; CHECK: cmp [[EXTRA_SHIFT]], #0
; CHECK: csel x1, [[HI_BIG_SHIFT]], [[HI_NORMAL]], ge
-; CHECK: lsl [[SMALLSHIFT_LO:x[0-9]+]], x0, x2
-; CHECK: csel x0, xzr, [[SMALLSHIFT_LO]], ge
+; CHECK: csel x0, xzr, [[HI_BIG_SHIFT]], ge
; CHECK: ret
%shl = shl i128 %r, %s
@@ -23,20 +21,18 @@ define i128 @shl(i128 %r, i128 %s) nounw
define i128 @ashr(i128 %r, i128 %s) nounwind readnone {
; CHECK-LABEL: ashr:
-; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
-; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
+; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2
; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]]
; CHECK: cmp x2, #0
; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq
; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2
; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]]
+; CHECK: asr [[LO_BIG_SHIFT:x[0-9]+]], x1, x2
; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
-; CHECK: asr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]]
; CHECK: cmp [[EXTRA_SHIFT]], #0
; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge
-; CHECK: asr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
; CHECK: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63
-; CHECK: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge
+; CHECK: csel x1, [[BIGSHIFT_HI]], [[LO_BIG_SHIFT]], ge
; CHECK: ret
%shr = ashr i128 %r, %s
@@ -45,19 +41,16 @@ define i128 @ashr(i128 %r, i128 %s) noun
define i128 @lshr(i128 %r, i128 %s) nounwind readnone {
; CHECK-LABEL: lshr:
-; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
-; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
+; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2
; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]]
; CHECK: cmp x2, #0
; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq
; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2
; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]]
-; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
-; CHECK: lsr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]]
+; CHECK: lsr [[LO_BIG_SHIFT:x[0-9]+]], x1, x2
; CHECK: cmp [[EXTRA_SHIFT]], #0
; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge
-; CHECK: lsr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
-; CHECK: csel x1, xzr, [[SMALLSHIFT_HI]], ge
+; CHECK: csel x1, xzr, [[LO_BIG_SHIFT]], ge
; CHECK: ret
%shr = lshr i128 %r, %s
Added: llvm/trunk/test/CodeGen/AArch64/shift-mod.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/shift-mod.ll?rev=333214&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/shift-mod.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/shift-mod.ll Thu May 24 11:29:42 2018
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+; Check that we optimize out AND instructions and ADD/SUB instructions
+; modulo the shift size to take advantage of the implicit mod done on
+; the shift amount value by the variable shift/rotate instructions.
+
+define i32 @test1(i32 %x, i64 %y) {
+; CHECK-LABEL: test1:
+; CHECK-NOT: and
+; CHECK: lsr
+ %sh_prom = trunc i64 %y to i32
+ %shr = lshr i32 %x, %sh_prom
+ ret i32 %shr
+}
+
+define i64 @test2(i32 %x, i64 %y) {
+; CHECK-LABEL: test2:
+; CHECK-NOT: orr
+; CHECK-NOT: sub
+; CHECK: neg
+; CHECK: asr
+ %sub9 = sub nsw i32 64, %x
+ %sh_prom12.i = zext i32 %sub9 to i64
+ %shr.i = ashr i64 %y, %sh_prom12.i
+ ret i64 %shr.i
+}
+
+define i64 @test3(i64 %x, i64 %y) {
+; CHECK-LABEL: test3:
+; CHECK-NOT: add
+; CHECK: lsl
+ %add = add nsw i64 64, %x
+ %shl = shl i64 %y, %add
+ ret i64 %shl
+}
\ No newline at end of file
More information about the llvm-commits
mailing list