[llvm] r215230 - [FastISel][X86] Use INC/DEC when possible for {sadd|ssub}.with.overflow intrinsics.
Juergen Ributzka
juergen at apple.com
Fri Aug 8 10:21:37 PDT 2014
Author: ributzka
Date: Fri Aug 8 12:21:37 2014
New Revision: 215230
URL: http://llvm.org/viewvc/llvm-project?rev=215230&view=rev
Log:
[FastISel][X86] Use INC/DEC when possible for {sadd|ssub}.with.overflow intrinsics.
This is a small peephole optimization to emit INC/DEC when possible.
Fixes <rdar://problem/17952308>.
Modified:
llvm/trunk/lib/Target/X86/X86FastISel.cpp
llvm/trunk/test/CodeGen/X86/xaluo.ll
Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=215230&r1=215229&r2=215230&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Fri Aug 8 12:21:37 2014
@@ -2374,15 +2374,19 @@ bool X86FastISel::FastLowerIntrinsicCall
isCommutativeIntrinsic(II))
std::swap(LHS, RHS);
+ bool UseIncDec = false;
+ if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
+ UseIncDec = true;
+
unsigned BaseOpc, CondOpc;
switch (II->getIntrinsicID()) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::sadd_with_overflow:
- BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
+ BaseOpc = UseIncDec ? X86ISD::INC : ISD::ADD; CondOpc = X86::SETOr; break;
case Intrinsic::uadd_with_overflow:
BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
case Intrinsic::ssub_with_overflow:
- BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
+ BaseOpc = UseIncDec ? X86ISD::DEC : ISD::SUB; CondOpc = X86::SETOr; break;
case Intrinsic::usub_with_overflow:
BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
case Intrinsic::smul_with_overflow:
@@ -2398,9 +2402,24 @@ bool X86FastISel::FastLowerIntrinsicCall
unsigned ResultReg = 0;
// Check if we have an immediate version.
- if (auto const *C = dyn_cast<ConstantInt>(RHS)) {
- ResultReg = FastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
- C->getZExtValue());
+ if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
+ static const unsigned Opc[2][2][4] = {
+ { { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
+ { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r } },
+ { { X86::INC8r, X86::INC64_16r, X86::INC64_32r, X86::INC64r },
+ { X86::DEC8r, X86::DEC64_16r, X86::DEC64_32r, X86::DEC64r } }
+ };
+
+ if (UseIncDec) {
+ ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ bool Is64Bit = Subtarget->is64Bit();
+ bool IsDec = BaseOpc == X86ISD::DEC;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc[Is64Bit][IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
+ .addReg(LHSReg, getKillRegState(LHSIsKill));
+ } else
+ ResultReg = FastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
+ CI->getZExtValue());
}
unsigned RHSReg;
Modified: llvm/trunk/test/CodeGen/X86/xaluo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xaluo.ll?rev=215230&r1=215229&r2=215230&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xaluo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xaluo.ll Fri Aug 8 12:21:37 2014
@@ -67,16 +67,48 @@ entry:
ret i1 %obit
}
-; SADDO reg, imm | imm, reg
-; FIXME: INC isn't supported in FastISel yet
-define zeroext i1 @saddo.i64imm1(i64 %v1, i64* %res) {
+; SADDO reg, 1 | INC
+define zeroext i1 @saddo.inc.i8(i8 %v1, i8* %res) {
entry:
-; DAG-LABEL: saddo.i64imm1
-; DAG: incq %rdi
-; DAG-NEXT: seto %al
-; FAST-LABEL: saddo.i64imm1
-; FAST: addq $1, %rdi
-; FAST-NEXT: seto %al
+; CHECK-LABEL: saddo.inc.i8
+; CHECK: incb %dil
+; CHECK-NEXT: seto %al
+ %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %v1, i8 1)
+ %val = extractvalue {i8, i1} %t, 0
+ %obit = extractvalue {i8, i1} %t, 1
+ store i8 %val, i8* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @saddo.inc.i16(i16 %v1, i16* %res) {
+entry:
+; CHECK-LABEL: saddo.inc.i16
+; CHECK: incw %di
+; CHECK-NEXT: seto %al
+ %t = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %v1, i16 1)
+ %val = extractvalue {i16, i1} %t, 0
+ %obit = extractvalue {i16, i1} %t, 1
+ store i16 %val, i16* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @saddo.inc.i32(i32 %v1, i32* %res) {
+entry:
+; CHECK-LABEL: saddo.inc.i32
+; CHECK: incl %edi
+; CHECK-NEXT: seto %al
+ %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 1)
+ %val = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ store i32 %val, i32* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @saddo.inc.i64(i64 %v1, i64* %res) {
+entry:
+; CHECK-LABEL: saddo.inc.i64
+; CHECK: incq %rdi
+; CHECK-NEXT: seto %al
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 1)
%val = extractvalue {i64, i1} %t, 0
%obit = extractvalue {i64, i1} %t, 1
@@ -84,17 +116,18 @@ entry:
ret i1 %obit
}
+; SADDO reg, imm | imm, reg
; FIXME: DAG doesn't optimize immediates on the LHS.
-define zeroext i1 @saddo.i64imm2(i64 %v1, i64* %res) {
+define zeroext i1 @saddo.i64imm1(i64 %v1, i64* %res) {
entry:
-; DAG-LABEL: saddo.i64imm2
+; DAG-LABEL: saddo.i64imm1
; DAG: mov
; DAG-NEXT: addq
; DAG-NEXT: seto
-; FAST-LABEL: saddo.i64imm2
-; FAST: addq $1, %rdi
+; FAST-LABEL: saddo.i64imm1
+; FAST: addq $2, %rdi
; FAST-NEXT: seto %al
- %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 1, i64 %v1)
+ %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 2, i64 %v1)
%val = extractvalue {i64, i1} %t, 0
%obit = extractvalue {i64, i1} %t, 1
store i64 %val, i64* %res
@@ -102,12 +135,12 @@ entry:
}
; Check boundary conditions for large immediates.
-define zeroext i1 @saddo.i64imm3(i64 %v1, i64* %res) {
+define zeroext i1 @saddo.i64imm2(i64 %v1, i64* %res) {
entry:
-; DAG-LABEL: saddo.i64imm3
+; DAG-LABEL: saddo.i64imm2
; DAG: addq $-2147483648, %rdi
; DAG-NEXT: seto %al
-; FAST-LABEL: saddo.i64imm3
+; FAST-LABEL: saddo.i64imm2
; FAST: addq $-2147483648, %rdi
; FAST-NEXT: seto %al
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -2147483648)
@@ -117,13 +150,13 @@ entry:
ret i1 %obit
}
-define zeroext i1 @saddo.i64imm4(i64 %v1, i64* %res) {
+define zeroext i1 @saddo.i64imm3(i64 %v1, i64* %res) {
entry:
-; DAG-LABEL: saddo.i64imm4
+; DAG-LABEL: saddo.i64imm3
; DAG: movabsq $-21474836489, %[[REG:[a-z]+]]
; DAG-NEXT: addq %rdi, %[[REG]]
; DAG-NEXT: seto
-; FAST-LABEL: saddo.i64imm4
+; FAST-LABEL: saddo.i64imm3
; FAST: movabsq $-21474836489, %[[REG:[a-z]+]]
; FAST-NEXT: addq %rdi, %[[REG]]
; FAST-NEXT: seto
@@ -134,12 +167,12 @@ entry:
ret i1 %obit
}
-define zeroext i1 @saddo.i64imm5(i64 %v1, i64* %res) {
+define zeroext i1 @saddo.i64imm4(i64 %v1, i64* %res) {
entry:
-; DAG-LABEL: saddo.i64imm5
+; DAG-LABEL: saddo.i64imm4
; DAG: addq $2147483647, %rdi
; DAG-NEXT: seto
-; FAST-LABEL: saddo.i64imm5
+; FAST-LABEL: saddo.i64imm4
; FAST: addq $2147483647, %rdi
; FAST-NEXT: seto
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 2147483647)
@@ -150,13 +183,13 @@ entry:
}
; TODO: FastISel shouldn't use movabsq.
-define zeroext i1 @saddo.i64imm6(i64 %v1, i64* %res) {
+define zeroext i1 @saddo.i64imm5(i64 %v1, i64* %res) {
entry:
-; DAG-LABEL: saddo.i64imm6
+; DAG-LABEL: saddo.i64imm5
; DAG: movl $2147483648, %ecx
; DAG: addq %rdi, %rcx
; DAG-NEXT: seto
-; FAST-LABEL: saddo.i64imm6
+; FAST-LABEL: saddo.i64imm5
; FAST: movabsq $2147483648, %[[REG:[a-z]+]]
; FAST: addq %rdi, %[[REG]]
; FAST-NEXT: seto
More information about the llvm-commits
mailing list