[llvm] r216242 - [FastISel][AArch64] Add support for variable shift.
Juergen Ributzka
juergen at apple.com
Thu Aug 21 16:06:08 PDT 2014
Author: ributzka
Date: Thu Aug 21 18:06:07 2014
New Revision: 216242
URL: http://llvm.org/viewvc/llvm-project?rev=216242&view=rev
Log:
[FastISel][AArch64] Add support for variable shift.
This adds the missing variable shift support for value type i8, i16, and i32.
This fixes <rdar://problem/18095685>.
Modified:
llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
llvm/trunk/test/CodeGen/AArch64/fast-isel-shift.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp?rev=216242&r1=216241&r2=216242&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp Thu Aug 21 18:06:07 2014
@@ -128,7 +128,7 @@ private:
bool SelectTrunc(const Instruction *I);
bool SelectIntExt(const Instruction *I);
bool SelectMul(const Instruction *I);
- bool SelectShift(const Instruction *I, bool IsLeftShift, bool IsArithmetic);
+ bool SelectShift(const Instruction *I);
bool SelectBitCast(const Instruction *I);
// Utility helper routines.
@@ -193,9 +193,15 @@ private:
unsigned Op1, bool Op1IsKill);
unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
- unsigned Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
- unsigned Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
- unsigned Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
+ unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill);
+ unsigned emitLSL_ri(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, uint64_t Imm);
+ unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill);
+ unsigned emitLSR_ri(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, uint64_t Imm);
+ unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill);
+ unsigned emitASR_ri(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, uint64_t Imm);
unsigned AArch64MaterializeInt(const ConstantInt *CI, MVT VT);
unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
@@ -703,8 +709,8 @@ bool AArch64FastISel::SimplifyAddress(Ad
Addr.getOffsetReg(), /*TODO:IsKill=*/false,
Addr.getShift());
else
- ResultReg = Emit_LSL_ri(MVT::i64, Addr.getOffsetReg(),
- /*Op0IsKill=*/false, Addr.getShift());
+ ResultReg = emitLSL_ri(MVT::i64, Addr.getOffsetReg(), /*Op0IsKill=*/false,
+ Addr.getShift());
if (!ResultReg)
return false;
@@ -2366,7 +2372,7 @@ bool AArch64FastISel::FastLowerIntrinsic
if (VT == MVT::i32) {
MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
- unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
+ unsigned ShiftReg = emitLSR_ri(MVT::i64, MulReg, /*IsKill=*/false, 32);
MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
AArch64::sub_32);
ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
@@ -2653,8 +2659,34 @@ unsigned AArch64FastISel::Emit_UMULL_rr(
AArch64::XZR, /*IsKill=*/true);
}
-unsigned AArch64FastISel::Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
- uint64_t Shift) {
+unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill) {
+ unsigned Opc = 0;
+ bool NeedTrunc = false;
+ uint64_t Mask = 0;
+ switch (RetVT.SimpleTy) {
+ default: return 0;
+ case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
+ case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
+ case MVT::i32: Opc = AArch64::LSLVWr; break;
+ case MVT::i64: Opc = AArch64::LSLVXr; break;
+ }
+
+ const TargetRegisterClass *RC =
+ (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ if (NeedTrunc) {
+ Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
+ Op1IsKill = true;
+ }
+ unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
+ Op1IsKill);
+ if (NeedTrunc)
+ ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ uint64_t Shift) {
unsigned Opc, ImmR, ImmS;
switch (RetVT.SimpleTy) {
default: return 0;
@@ -2673,8 +2705,35 @@ unsigned AArch64FastISel::Emit_LSL_ri(MV
return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
}
-unsigned AArch64FastISel::Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
- uint64_t Shift) {
+unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill) {
+ unsigned Opc = 0;
+ bool NeedTrunc = false;
+ uint64_t Mask = 0;
+ switch (RetVT.SimpleTy) {
+ default: return 0;
+ case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
+ case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
+ case MVT::i32: Opc = AArch64::LSRVWr; break;
+ case MVT::i64: Opc = AArch64::LSRVXr; break;
+ }
+
+ const TargetRegisterClass *RC =
+ (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ if (NeedTrunc) {
+ Op0Reg = emitAND_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
+ Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
+ Op0IsKill = Op1IsKill = true;
+ }
+ unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
+ Op1IsKill);
+ if (NeedTrunc)
+ ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ uint64_t Shift) {
unsigned Opc, ImmS;
switch (RetVT.SimpleTy) {
default: return 0;
@@ -2689,8 +2748,35 @@ unsigned AArch64FastISel::Emit_LSR_ri(MV
return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, Shift, ImmS);
}
-unsigned AArch64FastISel::Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
- uint64_t Shift) {
+unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill) {
+ unsigned Opc = 0;
+ bool NeedTrunc = false;
+ uint64_t Mask = 0;
+ switch (RetVT.SimpleTy) {
+ default: return 0;
+ case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
+ case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
+ case MVT::i32: Opc = AArch64::ASRVWr; break;
+ case MVT::i64: Opc = AArch64::ASRVXr; break;
+ }
+
+ const TargetRegisterClass *RC =
+ (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ if (NeedTrunc) {
+ Op0Reg = EmitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
+ Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
+ Op0IsKill = Op1IsKill = true;
+ }
+ unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
+ Op1IsKill);
+ if (NeedTrunc)
+ ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ uint64_t Shift) {
unsigned Opc, ImmS;
switch (RetVT.SimpleTy) {
default: return 0;
@@ -2892,31 +2978,56 @@ bool AArch64FastISel::SelectMul(const In
return true;
}
-bool AArch64FastISel::SelectShift(const Instruction *I, bool IsLeftShift,
- bool IsArithmetic) {
+bool AArch64FastISel::SelectShift(const Instruction *I) {
EVT RetEVT = TLI.getValueType(I->getType(), true);
if (!RetEVT.isSimple())
return false;
MVT RetVT = RetEVT.getSimpleVT();
- if (!isa<ConstantInt>(I->getOperand(1)))
- return false;
-
unsigned Op0Reg = getRegForValue(I->getOperand(0));
if (!Op0Reg)
return false;
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
- uint64_t ShiftVal = cast<ConstantInt>(I->getOperand(1))->getZExtValue();
+ if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ unsigned ResultReg = 0;
+ uint64_t ShiftVal = C->getZExtValue();
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Unexpected instruction.");
+ case Instruction::Shl:
+ ResultReg = emitLSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
+ break;
+ case Instruction::AShr:
+ ResultReg = emitASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
+ break;
+ case Instruction::LShr:
+ ResultReg = emitLSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
+ break;
+ }
+ if (!ResultReg)
+ return false;
- unsigned ResultReg;
- if (IsLeftShift)
- ResultReg = Emit_LSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
- else {
- if (IsArithmetic)
- ResultReg = Emit_ASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
- else
- ResultReg = Emit_LSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (!Op1Reg)
+ return false;
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ unsigned ResultReg = 0;
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Unexpected instruction.");
+ case Instruction::Shl:
+ ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
+ break;
+ case Instruction::AShr:
+ ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
+ break;
+ case Instruction::LShr:
+ ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
+ break;
}
if (!ResultReg)
@@ -3012,12 +3123,10 @@ bool AArch64FastISel::TargetSelectInstru
// selector -> improve FastISel tblgen.
case Instruction::Mul:
return SelectMul(I);
- case Instruction::Shl:
- return SelectShift(I, /*IsLeftShift=*/true, /*IsArithmetic=*/false);
- case Instruction::LShr:
- return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/false);
+ case Instruction::Shl: // fall-through
+ case Instruction::LShr: // fall-through
case Instruction::AShr:
- return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/true);
+ return SelectShift(I);
case Instruction::BitCast:
return SelectBitCast(I);
}
Modified: llvm/trunk/test/CodeGen/AArch64/fast-isel-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fast-isel-shift.ll?rev=216242&r1=216241&r2=216242&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fast-isel-shift.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fast-isel-shift.ll Thu Aug 21 18:06:07 2014
@@ -1,87 +1,187 @@
; RUN: llc -fast-isel -fast-isel-abort -mtriple=arm64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
+; CHECK-LABEL: lslv_i8
+; CHECK: and [[REG1:w[0-9]+]], w1, #0xff
+; CHECK-NEXT: lsl [[REG2:w[0-9]+]], w0, [[REG1]]
+; CHECK-NEXT: and {{w[0-9]+}}, [[REG2]], #0xff
+define zeroext i8 @lslv_i8(i8 %a, i8 %b) {
+ %1 = shl i8 %a, %b
+ ret i8 %1
+}
+
; CHECK-LABEL: lsl_i8
-; CHECK: ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
+; CHECK: ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
define zeroext i8 @lsl_i8(i8 %a) {
%1 = shl i8 %a, 4
ret i8 %1
}
+; CHECK-LABEL: lslv_i16
+; CHECK: and [[REG1:w[0-9]+]], w1, #0xffff
+; CHECK-NEXT: lsl [[REG2:w[0-9]+]], w0, [[REG1]]
+; CHECK-NEXT: and {{w[0-9]+}}, [[REG2]], #0xffff
+define zeroext i16 @lslv_i16(i16 %a, i16 %b) {
+ %1 = shl i16 %a, %b
+ ret i16 %1
+}
+
; CHECK-LABEL: lsl_i16
-; CHECK: ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #8, #8
+; CHECK: ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #8, #8
define zeroext i16 @lsl_i16(i16 %a) {
%1 = shl i16 %a, 8
ret i16 %1
}
+; CHECK-LABEL: lslv_i32
+; CHECK: lsl {{w[0-9]*}}, w0, w1
+define zeroext i32 @lslv_i32(i32 %a, i32 %b) {
+ %1 = shl i32 %a, %b
+ ret i32 %1
+}
+
; CHECK-LABEL: lsl_i32
-; CHECK: lsl {{w[0-9]*}}, {{w[0-9]*}}, #16
+; CHECK: lsl {{w[0-9]*}}, {{w[0-9]*}}, #16
define zeroext i32 @lsl_i32(i32 %a) {
%1 = shl i32 %a, 16
ret i32 %1
}
+; CHECK-LABEL: lslv_i64
+; CHECK: lsl {{x[0-9]*}}, x0, x1
+define i64 @lslv_i64(i64 %a, i64 %b) {
+ %1 = shl i64 %a, %b
+ ret i64 %1
+}
+
; FIXME: This shouldn't use the variable shift version.
; CHECK-LABEL: lsl_i64
-; CHECK: lsl {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}}
+; CHECK: lsl {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}}
define i64 @lsl_i64(i64 %a) {
%1 = shl i64 %a, 32
ret i64 %1
}
+; CHECK-LABEL: lsrv_i8
+; CHECK: and [[REG1:w[0-9]+]], w0, #0xff
+; CHECK-NEXT: and [[REG2:w[0-9]+]], w1, #0xff
+; CHECK-NEXT: lsr [[REG3:w[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: and {{w[0-9]+}}, [[REG3]], #0xff
+define zeroext i8 @lsrv_i8(i8 %a, i8 %b) {
+ %1 = lshr i8 %a, %b
+ ret i8 %1
+}
+
; CHECK-LABEL: lsr_i8
-; CHECK: ubfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
+; CHECK: ubfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
define zeroext i8 @lsr_i8(i8 %a) {
%1 = lshr i8 %a, 4
ret i8 %1
}
+; CHECK-LABEL: lsrv_i16
+; CHECK: and [[REG1:w[0-9]+]], w0, #0xffff
+; CHECK-NEXT: and [[REG2:w[0-9]+]], w1, #0xffff
+; CHECK-NEXT: lsr [[REG3:w[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: and {{w[0-9]+}}, [[REG3]], #0xffff
+define zeroext i16 @lsrv_i16(i16 %a, i16 %b) {
+ %1 = lshr i16 %a, %b
+ ret i16 %1
+}
+
; CHECK-LABEL: lsr_i16
-; CHECK: ubfx {{w[0-9]*}}, {{w[0-9]*}}, #8, #8
+; CHECK: ubfx {{w[0-9]*}}, {{w[0-9]*}}, #8, #8
define zeroext i16 @lsr_i16(i16 %a) {
%1 = lshr i16 %a, 8
ret i16 %1
}
+; CHECK-LABEL: lsrv_i32
+; CHECK: lsr {{w[0-9]*}}, w0, w1
+define zeroext i32 @lsrv_i32(i32 %a, i32 %b) {
+ %1 = lshr i32 %a, %b
+ ret i32 %1
+}
+
; CHECK-LABEL: lsr_i32
-; CHECK: lsr {{w[0-9]*}}, {{w[0-9]*}}, #16
+; CHECK: lsr {{w[0-9]*}}, {{w[0-9]*}}, #16
define zeroext i32 @lsr_i32(i32 %a) {
%1 = lshr i32 %a, 16
ret i32 %1
}
+; CHECK-LABEL: lsrv_i64
+; CHECK: lsr {{x[0-9]*}}, x0, x1
+define i64 @lsrv_i64(i64 %a, i64 %b) {
+ %1 = lshr i64 %a, %b
+ ret i64 %1
+}
+
; FIXME: This shouldn't use the variable shift version.
; CHECK-LABEL: lsr_i64
-; CHECK: lsr {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}}
+; CHECK: lsr {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}}
define i64 @lsr_i64(i64 %a) {
%1 = lshr i64 %a, 32
ret i64 %1
}
+; CHECK-LABEL: asrv_i8
+; CHECK: sxtb [[REG1:w[0-9]+]], w0
+; CHECK-NEXT: and [[REG2:w[0-9]+]], w1, #0xff
+; CHECK-NEXT: asr [[REG3:w[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: and {{w[0-9]+}}, [[REG3]], #0xff
+define zeroext i8 @asrv_i8(i8 %a, i8 %b) {
+ %1 = ashr i8 %a, %b
+ ret i8 %1
+}
+
; CHECK-LABEL: asr_i8
-; CHECK: sbfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
+; CHECK: sbfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
define zeroext i8 @asr_i8(i8 %a) {
%1 = ashr i8 %a, 4
ret i8 %1
}
+; CHECK-LABEL: asrv_i16
+; CHECK: sxth [[REG1:w[0-9]+]], w0
+; CHECK-NEXT: and [[REG2:w[0-9]+]], w1, #0xffff
+; CHECK-NEXT: asr [[REG3:w[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: and {{w[0-9]+}}, [[REG3]], #0xffff
+define zeroext i16 @asrv_i16(i16 %a, i16 %b) {
+ %1 = ashr i16 %a, %b
+ ret i16 %1
+}
+
; CHECK-LABEL: asr_i16
-; CHECK: sbfx {{w[0-9]*}}, {{w[0-9]*}}, #8, #8
+; CHECK: sbfx {{w[0-9]*}}, {{w[0-9]*}}, #8, #8
define zeroext i16 @asr_i16(i16 %a) {
%1 = ashr i16 %a, 8
ret i16 %1
}
+; CHECK-LABEL: asrv_i32
+; CHECK: asr {{w[0-9]*}}, w0, w1
+define zeroext i32 @asrv_i32(i32 %a, i32 %b) {
+ %1 = ashr i32 %a, %b
+ ret i32 %1
+}
+
; CHECK-LABEL: asr_i32
-; CHECK: asr {{w[0-9]*}}, {{w[0-9]*}}, #16
+; CHECK: asr {{w[0-9]*}}, {{w[0-9]*}}, #16
define zeroext i32 @asr_i32(i32 %a) {
%1 = ashr i32 %a, 16
ret i32 %1
}
+; CHECK-LABEL: asrv_i64
+; CHECK: asr {{x[0-9]*}}, x0, x1
+define i64 @asrv_i64(i64 %a, i64 %b) {
+ %1 = ashr i64 %a, %b
+ ret i64 %1
+}
+
; FIXME: This shouldn't use the variable shift version.
; CHECK-LABEL: asr_i64
-; CHECK: asr {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}}
+; CHECK: asr {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}}
define i64 @asr_i64(i64 %a) {
%1 = ashr i64 %a, 32
ret i64 %1
More information about the llvm-commits
mailing list