[llvm] r217986 - [FastISel][AArch64] Custom lower sdiv by power-of-2.
Juergen Ributzka
juergen at apple.com
Wed Sep 17 14:55:55 PDT 2014
Author: ributzka
Date: Wed Sep 17 16:55:55 2014
New Revision: 217986
URL: http://llvm.org/viewvc/llvm-project?rev=217986&view=rev
Log:
[FastISel][AArch64] Custom lower sdiv by power-of-2.
Emit an optimized instruction sequence for sdiv by power-of-2 depending on the
exact flag.
This fixes rdar://problem/18224511.
Added:
llvm/trunk/test/CodeGen/AArch64/fast-isel-sdiv.ll
Modified:
llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
Modified: llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp?rev=217986&r1=217985&r2=217986&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp Wed Sep 17 16:55:55 2014
@@ -133,6 +133,7 @@ private:
bool selectShift(const Instruction *I);
bool selectBitCast(const Instruction *I);
bool selectFRem(const Instruction *I);
+ bool selectSDiv(const Instruction *I);
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
@@ -3980,6 +3981,75 @@ bool AArch64FastISel::selectFRem(const I
return true;
}
+bool AArch64FastISel::selectSDiv(const Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ if (!isa<ConstantInt>(I->getOperand(1)))
+ return selectBinaryOp(I, ISD::SDIV);
+
+ const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
+ if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
+ !(C.isPowerOf2() || (-C).isPowerOf2()))
+ return selectBinaryOp(I, ISD::SDIV);
+
+ unsigned Lg2 = C.countTrailingZeros();
+ unsigned Src0Reg = getRegForValue(I->getOperand(0));
+ if (!Src0Reg)
+ return false;
+ bool Src0IsKill = hasTrivialKill(I->getOperand(0));
+
+ if (cast<BinaryOperator>(I)->isExact()) {
+ unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
+ if (!ResultReg)
+ return false;
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ unsigned Pow2MinusOne = (1 << Lg2) - 1;
+ unsigned AddReg = emitAddSub_ri(/*UseAdd=*/true, VT, Src0Reg,
+ /*IsKill=*/false, Pow2MinusOne);
+ if (!AddReg)
+ return false;
+
+ // (Src0 < 0) ? Pow2 - 1 : 0;
+ if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
+ return false;
+
+ unsigned SelectOpc;
+ const TargetRegisterClass *RC;
+ if (VT == MVT::i64) {
+ SelectOpc = AArch64::CSELXr;
+ RC = &AArch64::GPR64RegClass;
+ } else {
+ SelectOpc = AArch64::CSELWr;
+ RC = &AArch64::GPR32RegClass;
+ }
+ unsigned SelectReg =
+ fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
+ Src0IsKill, AArch64CC::LT);
+ if (!SelectReg)
+ return false;
+
+ // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
+ // negate the result.
+ unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+ unsigned ResultReg;
+ if (C.isNegative())
+ ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
+ SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
+ else
+ ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
+
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
default:
@@ -3989,6 +4059,8 @@ bool AArch64FastISel::fastSelectInstruct
return selectAddSub(I);
case Instruction::Mul:
return selectMul(I);
+ case Instruction::SDiv:
+ return selectSDiv(I);
case Instruction::SRem:
if (!selectBinaryOp(I, ISD::SREM))
return selectRem(I, ISD::SREM);
Added: llvm/trunk/test/CodeGen/AArch64/fast-isel-sdiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fast-isel-sdiv.ll?rev=217986&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fast-isel-sdiv.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/fast-isel-sdiv.ll Wed Sep 17 16:55:55 2014
@@ -0,0 +1,56 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+define i32 @sdiv_i32_exact(i32 %a) {
+; CHECK-LABEL: sdiv_i32_exact
+; CHECK: asr {{w[0-9]+}}, w0, #3
+ %1 = sdiv exact i32 %a, 8
+ ret i32 %1
+}
+
+define i32 @sdiv_i32_pos(i32 %a) {
+; CHECK-LABEL: sdiv_i32_pos
+; CHECK: add [[REG1:w[0-9]+]], w0, #7
+; CHECK-NEXT: cmp w0, #0
+; CHECK-NEXT: csel [[REG2:w[0-9]+]], [[REG1]], w0, lt
+; CHECK-NEXT: asr {{w[0-9]+}}, [[REG2]], #3
+ %1 = sdiv i32 %a, 8
+ ret i32 %1
+}
+
+define i32 @sdiv_i32_neg(i32 %a) {
+; CHECK-LABEL: sdiv_i32_neg
+; CHECK: add [[REG1:w[0-9]+]], w0, #7
+; CHECK-NEXT: cmp w0, #0
+; CHECK-NEXT: csel [[REG2:w[0-9]+]], [[REG1]], w0, lt
+; CHECK-NEXT: neg {{w[0-9]+}}, [[REG2]], asr #3
+ %1 = sdiv i32 %a, -8
+ ret i32 %1
+}
+
+define i64 @sdiv_i64_exact(i64 %a) {
+; CHECK-LABEL: sdiv_i64_exact
+; CHECK: asr {{x[0-9]+}}, x0, #4
+ %1 = sdiv exact i64 %a, 16
+ ret i64 %1
+}
+
+define i64 @sdiv_i64_pos(i64 %a) {
+; CHECK-LABEL: sdiv_i64_pos
+; CHECK: add [[REG1:x[0-9]+]], x0, #15
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: csel [[REG2:x[0-9]+]], [[REG1]], x0, lt
+; CHECK-NEXT: asr {{x[0-9]+}}, [[REG2]], #4
+ %1 = sdiv i64 %a, 16
+ ret i64 %1
+}
+
+define i64 @sdiv_i64_neg(i64 %a) {
+; CHECK-LABEL: sdiv_i64_neg
+; CHECK: add [[REG1:x[0-9]+]], x0, #15
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: csel [[REG2:x[0-9]+]], [[REG1]], x0, lt
+; CHECK-NEXT: neg {{x[0-9]+}}, [[REG2]], asr #4
+ %1 = sdiv i64 %a, -16
+ ret i64 %1
+}
More information about the llvm-commits
mailing list