[llvm] r347965 - [ARM] Don't expand sdiv when optimising for minsize
Sjoerd Meijer via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 30 00:14:29 PST 2018
Author: sjoerdmeijer
Date: Fri Nov 30 00:14:28 2018
New Revision: 347965
URL: http://llvm.org/viewvc/llvm-project?rev=347965&view=rev
Log:
[ARM] Don't expand sdiv when optimising for minsize
Don't expand SDIV with an immediate that is a power of 2 if we optimise for
minimum code size. For example:
sdiv %1, i32 4
gets expanded to a sequence of 3 instructions, but this is suboptimal for
minimum code size so instead we just generate a MOV and a SDIV if integer
division is supported.
Differential Revision: https://reviews.llvm.org/D54546
Added:
llvm/trunk/test/CodeGen/ARM/sdiv-pow2-arm-size.ll
llvm/trunk/test/CodeGen/ARM/sdiv-pow2-thumb-size.ll
Modified:
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.h
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=347965&r1=347964&r2=347965&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri Nov 30 00:14:28 2018
@@ -7794,6 +7794,50 @@ SDValue ARMTargetLowering::LowerWindowsD
return LowerCallTo(CLI).first;
}
+// This is a code size optimisation: return the original SDIV node to
+// DAGCombiner when we don't want to expand SDIV into a sequence of
+// instructions, and an empty node otherwise which will cause the
+// SDIV to be expanded in DAGCombine.
+SDValue
+ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ // TODO: Support SREM
+ if (N->getOpcode() != ISD::SDIV)
+ return SDValue();
+
+ const auto &ST = static_cast<const ARMSubtarget&>(DAG.getSubtarget());
+ const auto &MF = DAG.getMachineFunction();
+ const bool MinSize = MF.getFunction().optForMinSize();
+ const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
+ : ST.hasDivideInARMMode();
+
+ // Don't touch vector types; rewriting this may lead to scalarizing
+ // the int divs.
+ if (N->getOperand(0).getValueType().isVector())
+ return SDValue();
+
+ // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
+ // hwdiv support for this to be really profitable.
+ if (!(MinSize && HasDivide))
+ return SDValue();
+
+ // ARM mode is a bit simpler than Thumb: we can handle large power
+ // of 2 immediates with 1 mov instruction; no further checks required,
+ // just return the sdiv node.
+ if (!ST.isThumb())
+ return SDValue(N, 0);
+
+ // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
+ // and thus lose the code size benefits of a MOVS that requires only 2.
+ // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
+ // but as it's doing exactly this, it's not worth the trouble to get TTI.
+ if (Divisor.sgt(128))
+ return SDValue();
+
+ return SDValue(N, 0);
+}
+
SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
bool Signed) const {
assert(Op.getValueType() == MVT::i32 &&
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=347965&r1=347964&r2=347965&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Fri Nov 30 00:14:28 2018
@@ -694,6 +694,9 @@ class VectorType;
unsigned getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const override;
+ SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const override;
+
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
/// expanded to FMAs when this method returns true, otherwise fmuladd is
Added: llvm/trunk/test/CodeGen/ARM/sdiv-pow2-arm-size.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/sdiv-pow2-arm-size.ll?rev=347965&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/sdiv-pow2-arm-size.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/sdiv-pow2-arm-size.ll Fri Nov 30 00:14:28 2018
@@ -0,0 +1,79 @@
+; RUN: llc -mtriple=armv7a -mattr=+hwdiv-arm %s -o - | FileCheck %s --check-prefixes=CHECK,DIV
+; RUN: llc -mtriple=armv7a -mattr=-hwdiv-arm %s -o - | FileCheck %s --check-prefixes=CHECK,NODIV
+
+; Check SREM
+define dso_local i32 @test_rem(i32 %F) local_unnamed_addr #0 {
+; CHECK-LABEL: test_rem
+; CHECK: asr r1, r0, #31
+; CHECK-NEXT: add r1, r0, r1, lsr #30
+; CHECK-NEXT: bic r1, r1, #3
+; CHECK-NEXT: sub r0, r0, r1
+
+entry:
+ %div = srem i32 %F, 4
+ ret i32 %div
+}
+
+; Try an i16 sdiv, with a small immediate.
+define dso_local signext i16 @f0(i16 signext %F) local_unnamed_addr #0 {
+; CHECK-LABEL: f0
+
+; DIV: mov r1, #2
+; DIV-NEXT: sdiv r0, r0, r1
+; DIV-NEXT: sxth r0, r0
+; DIV-NEXT: bx lr
+
+; NODIV: uxth r1, r0
+; NODIV-NEXT: add r0, r0, r1, lsr #15
+; NODIV-NEXT: sxth r0, r0
+; NODIV-NEXT: asr r0, r0, #1
+; NODIV-NEXT: bx lr
+
+entry:
+ %0 = sdiv i16 %F, 2
+ ret i16 %0
+}
+
+; Try an i32 sdiv, with a small immediate.
+define dso_local i32 @f1(i32 %F) local_unnamed_addr #0 {
+; CHECK-LABEL: f1
+
+; DIV: mov r1, #4
+; DIV-NEXT: sdiv r0, r0, r1
+; DIV-NEXT: bx lr
+
+; NODIV: asr r1, r0, #31
+; NODIV-NEXT: add r0, r0, r1, lsr #30
+; NODIV-NEXT: asr r0, r0, #2
+; NODIV-NEXT: bx lr
+
+entry:
+ %div = sdiv i32 %F, 4
+ ret i32 %div
+}
+
+; Try a large power of 2 immediate, which should also be materialised with 1
+; move immediate instruction.
+define dso_local i32 @f2(i32 %F) local_unnamed_addr #0 {
+; CHECK-LABEL: f2
+; DIV: mov r1, #131072
+; DIV-NEXT: sdiv r0, r0, r1
+; DIV-NEXT: bx lr
+entry:
+ %div = sdiv i32 %F, 131072
+ ret i32 %div
+}
+
+; MinSize not set, so should expand to the faster but longer sequence.
+define dso_local i32 @f3(i32 %F) {
+; CHECK-LABEL: f3
+; CHECK: asr r1, r0, #31
+; CHECK-NEXT: add r0, r0, r1, lsr #30
+; CHECK-NEXT: asr r0, r0, #2
+; CHECK-NEXT: bx lr
+entry:
+ %div = sdiv i32 %F, 4
+ ret i32 %div
+}
+
+attributes #0 = { minsize norecurse nounwind optsize readnone }
Added: llvm/trunk/test/CodeGen/ARM/sdiv-pow2-thumb-size.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/sdiv-pow2-thumb-size.ll?rev=347965&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/sdiv-pow2-thumb-size.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/sdiv-pow2-thumb-size.ll Fri Nov 30 00:14:28 2018
@@ -0,0 +1,105 @@
+; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s --check-prefixes=CHECK,T2
+; RUN: llc -mtriple=thumbv8m.main %s -o - | FileCheck %s --check-prefixes=CHECK,T2
+; RUN: llc -mtriple=thumbv8m.base %s -o - | FileCheck %s --check-prefixes=CHECK,T1
+; RUN: llc -mtriple=thumbv7em %s -o - | FileCheck %s --check-prefixes=CHECK,T2
+; RUN: llc -mtriple=thumbv6m %s -o - | FileCheck %s --check-prefixes=V6M
+
+; Armv6m targets don't have a sdiv instruction, so sdiv should not appear at
+; all in the output:
+
+; V6M: .file {{.*}}
+; V6M-NOT: sdiv
+; V6M-NOT: idiv
+
+; Test sdiv i16
+define dso_local signext i16 @f0(i16 signext %F) local_unnamed_addr #0 {
+; CHECK-LABEL: f0
+; CHECK: movs r1, #2
+; CHECK-NEXT: sdiv r0, r0, r1
+; CHECK-NEXT: sxth r0, r0
+; CHECK-NEXT: bx lr
+
+entry:
+ %0 = sdiv i16 %F, 2
+ ret i16 %0
+}
+
+; Same as above, but now with i32
+define dso_local i32 @f1(i32 %F) local_unnamed_addr #0 {
+; CHECK-LABEL: f1
+; CHECK: movs r1, #4
+; CHECK-NEXT: sdiv r0, r0, r1
+; CHECK-NEXT: bx lr
+
+entry:
+ %div = sdiv i32 %F, 4
+ ret i32 %div
+}
+
+; The immediate is not a power of 2, so we expect a sdiv.
+define dso_local i32 @f2(i32 %F) local_unnamed_addr #0 {
+; CHECK-LABEL: f2
+; CHECK: movs r1, #5
+; CHECK-NEXT: sdiv r0, r0, r1
+; CHECK-NEXT: bx lr
+
+entry:
+ %div = sdiv i32 %F, 5
+ ret i32 %div
+}
+
+; Try a larger power of 2 immediate: immediates larger than
+; 128 don't give any code size savings.
+define dso_local i32 @f3(i32 %F) local_unnamed_addr #0 {
+; CHECK-LABEL: f3
+; CHECK-NOT: sdiv
+entry:
+ %div = sdiv i32 %F, 256
+ ret i32 %div
+}
+
+attributes #0 = { minsize norecurse nounwind optsize readnone }
+
+
+; These functions don't have the minsize attribute set, so should not lower
+; the sdiv to sdiv, but to the faster instruction sequence.
+
+define dso_local signext i16 @f4(i16 signext %F) {
+; T2-LABEL: f4
+; T2: uxth r1, r0
+; T2-NEXT: add.w r0, r0, r1, lsr #15
+; T2-NEXT: sxth r0, r0
+; T2-NEXT: asrs r0, r0, #1
+; T2-NEXT: bx lr
+
+; T1-LABEL: f4
+; T1: uxth r1, r0
+; T1-NEXT: lsrs r1, r1, #15
+; T1-NEXT: adds r0, r0, r1
+; T1-NEXT: sxth r0, r0
+; T1-NEXT: asrs r0, r0, #1
+; T1-NEXT: bx lr
+
+entry:
+ %0 = sdiv i16 %F, 2
+ ret i16 %0
+}
+
+define dso_local i32 @f5(i32 %F) {
+; T2-LABEL: f5
+; T2: asrs r1, r0, #31
+; T2-NEXT: add.w r0, r0, r1, lsr #30
+; T2-NEXT: asrs r0, r0, #2
+; T2-NEXT: bx lr
+
+; T1-LABEL: f5
+; T1: asrs r1, r0, #31
+; T1-NEXT: lsrs r1, r1, #30
+; T1-NEXT: adds r0, r0, r1
+; T1-NEXT: asrs r0, r0, #2
+; T1-NEXT: bx lr
+
+entry:
+ %div = sdiv i32 %F, 4
+ ret i32 %div
+}
More information about the llvm-commits
mailing list