[llvm] da79b1e - [SelectionDAG][X86][ARM] Teach ExpandIntRes_ABS to use sra+add+xor expansion when ADDCARRY is supported.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 7 13:15:49 PDT 2020
Author: Craig Topper
Date: 2020-09-07T13:15:26-07:00
New Revision: da79b1eecc65171f6ca0cda9b4f1970bd1503c17
URL: https://github.com/llvm/llvm-project/commit/da79b1eecc65171f6ca0cda9b4f1970bd1503c17
DIFF: https://github.com/llvm/llvm-project/commit/da79b1eecc65171f6ca0cda9b4f1970bd1503c17.diff
LOG: [SelectionDAG][X86][ARM] Teach ExpandIntRes_ABS to use sra+add+xor expansion when ADDCARRY is supported.
Rather than using SELECT instructions, use SRA, UADDO/ADDCARRY and
XORs to expand ABS. This is the multi-part version of the sequence
we use in LegalizeDAG.
It's also the same as the Custom sequence uses for i64 on 32-bit
and i128 on 64-bit. So we can remove the X86 customization.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D87215
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/Thumb2/mve-abs.ll
llvm/test/CodeGen/X86/abs.ll
llvm/test/CodeGen/X86/iabs.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 77a79a0479ef..e1881c20e5b3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2789,16 +2789,38 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
+ SDValue N0 = N->getOperand(0);
+ GetExpandedInteger(N0, Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ // If we have ADDCARRY, use the expanded form of the sra+add+xor sequence we
+ // use in LegalizeDAG. The ADD part of the expansion is based on
+ // ExpandIntRes_ADDSUB which also uses ADDCARRY/UADDO after checking that
+ // ADDCARRY is LegalOrCustom. Each of the pieces here can be further expanded
+ // if needed. Shift expansion has a special case for filling with sign bits
+ // so that we will only end up with one SRA.
+ bool HasAddCarry = TLI.isOperationLegalOrCustom(
+ ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ if (HasAddCarry) {
+ EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG);
+ SDValue Sign =
+ DAG.getNode(ISD::SRA, dl, NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy));
+ SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
+ Lo = DAG.getNode(ISD::UADDO, dl, VTList, Lo, Sign);
+ Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Hi, Sign, Lo.getValue(1));
+ Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign);
+ Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign);
+ return;
+ }
+
// abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo)
EVT VT = N->getValueType(0);
- SDValue N0 = N->getOperand(0);
SDValue Neg = DAG.getNode(ISD::SUB, dl, VT,
DAG.getConstant(0, dl, VT), N0);
SDValue NegLo, NegHi;
SplitInteger(Neg, NegLo, NegHi);
- GetExpandedInteger(N0, Lo, Hi);
- EVT NVT = Lo.getValueType();
SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT),
DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT);
Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ad8704f686c1..2c7c36325f14 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -193,10 +193,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasCMov()) {
setOperationAction(ISD::ABS , MVT::i16 , Custom);
setOperationAction(ISD::ABS , MVT::i32 , Custom);
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::ABS , MVT::i64 , Custom);
}
- setOperationAction(ISD::ABS , MVT::i64 , Custom);
- if (Subtarget.is64Bit())
- setOperationAction(ISD::ABS , MVT::i128 , Custom);
// Funnel shifts.
for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
@@ -29720,31 +29719,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Res);
return;
}
- case ISD::ABS: {
- assert((Subtarget.is64Bit() || N->getValueType(0) == MVT::i64) &&
- "Unexpected type (!= i64) on ABS.");
- assert((!Subtarget.is64Bit() || N->getValueType(0) == MVT::i128) &&
- "Unexpected type (!= i128) on ABS.");
- MVT VT = N->getSimpleValueType(0);
- MVT HalfT = VT == MVT::i128 ? MVT::i64 : MVT::i32;
- SDValue Lo, Hi, Tmp;
- SDVTList VTList = DAG.getVTList(HalfT, MVT::i1);
-
- Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
- DAG.getConstant(0, dl, HalfT));
- Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
- DAG.getConstant(1, dl, HalfT));
- Tmp = DAG.getNode(
- ISD::SRA, dl, HalfT, Hi,
- DAG.getShiftAmountConstant(HalfT.getSizeInBits() - 1, HalfT, dl));
- Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo);
- Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi,
- SDValue(Lo.getNode(), 1));
- Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi);
- Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo);
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi));
- return;
- }
// We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
case X86ISD::FMINC:
case X86ISD::FMIN:
diff --git a/llvm/test/CodeGen/Thumb2/mve-abs.ll b/llvm/test/CodeGen/Thumb2/mve-abs.ll
index 0b5dcbced1a5..8a9b8814ef2e 100644
--- a/llvm/test/CodeGen/Thumb2/mve-abs.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-abs.ll
@@ -40,33 +40,24 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) {
; CHECK-LABEL: abs_v2i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: vmov r1, s0
-; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: vmov r0, s1
-; CHECK-NEXT: rsbs.w lr, r1, #0
-; CHECK-NEXT: sbc.w r2, r12, r0
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: cset r3, mi
-; CHECK-NEXT: ands r3, r3, #1
-; CHECK-NEXT: csel r1, lr, r1, ne
-; CHECK-NEXT: csel r0, r2, r0, ne
-; CHECK-NEXT: vmov.32 q1[0], r1
-; CHECK-NEXT: vmov r1, s2
-; CHECK-NEXT: vmov.32 q1[1], r0
+; CHECK-NEXT: vmov r1, s0
+; CHECK-NEXT: adds.w r1, r1, r0, asr #31
+; CHECK-NEXT: adc.w r2, r0, r0, asr #31
+; CHECK-NEXT: eor.w r2, r2, r0, asr #31
+; CHECK-NEXT: eor.w r0, r1, r0, asr #31
+; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov r0, s3
-; CHECK-NEXT: rsbs r2, r1, #0
-; CHECK-NEXT: sbc.w r12, r12, r0
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: cset r3, mi
-; CHECK-NEXT: ands r3, r3, #1
-; CHECK-NEXT: csel r1, r2, r1, ne
-; CHECK-NEXT: csel r0, r12, r0, ne
+; CHECK-NEXT: vmov r1, s2
+; CHECK-NEXT: vmov.32 q1[1], r2
+; CHECK-NEXT: adds.w r1, r1, r0, asr #31
+; CHECK-NEXT: eor.w r1, r1, r0, asr #31
; CHECK-NEXT: vmov.32 q1[2], r1
+; CHECK-NEXT: adc.w r1, r0, r0, asr #31
+; CHECK-NEXT: eor.w r0, r1, r0, asr #31
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: vmov q0, q1
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: bx lr
entry:
%0 = icmp slt <2 x i64> %s1, zeroinitializer
%1 = sub nsw <2 x i64> zeroinitializer, %s1
diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll
index 63faafc10ec8..8e20b001cc3e 100644
--- a/llvm/test/CodeGen/X86/abs.ll
+++ b/llvm/test/CodeGen/X86/abs.ll
@@ -144,35 +144,31 @@ define i128 @test_i128(i128 %a) nounwind {
;
; X86-LABEL: test_i128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: addl %edx, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: negl %edi
-; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %edx, %ebx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %eax, %esi
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: cmovnsl %eax, %esi
-; X86-NEXT: cmovnsl %ecx, %ebp
-; X86-NEXT: cmovnsl %edx, %ebx
-; X86-NEXT: cmovnsl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, (%eax)
-; X86-NEXT: movl %ebx, 4(%eax)
-; X86-NEXT: movl %ebp, 8(%eax)
-; X86-NEXT: movl %esi, 12(%eax)
+; X86-NEXT: adcl %edx, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: adcl %edx, %ebx
+; X86-NEXT: adcl %edx, %ecx
+; X86-NEXT: xorl %edx, %ecx
+; X86-NEXT: xorl %edx, %ebx
+; X86-NEXT: xorl %edx, %edi
+; X86-NEXT: xorl %edx, %esi
+; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %ebx, 8(%eax)
+; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%r = call i128 @llvm.abs.i128(i128 %a, i1 false)
ret i128 %r
diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll
index f052718d9840..319eb6f5edc3 100644
--- a/llvm/test/CodeGen/X86/iabs.ll
+++ b/llvm/test/CodeGen/X86/iabs.ll
@@ -121,73 +121,34 @@ define i64 @test_i64(i64 %a) nounwind {
}
define i128 @test_i128(i128 %a) nounwind {
-; X86-NO-CMOV-LABEL: test_i128:
-; X86-NO-CMOV: # %bb.0:
-; X86-NO-CMOV-NEXT: pushl %ebp
-; X86-NO-CMOV-NEXT: pushl %ebx
-; X86-NO-CMOV-NEXT: pushl %edi
-; X86-NO-CMOV-NEXT: pushl %esi
-; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NO-CMOV-NEXT: xorl %ecx, %ecx
-; X86-NO-CMOV-NEXT: negl %ebp
-; X86-NO-CMOV-NEXT: movl $0, %ebx
-; X86-NO-CMOV-NEXT: sbbl %edx, %ebx
-; X86-NO-CMOV-NEXT: movl $0, %edi
-; X86-NO-CMOV-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NO-CMOV-NEXT: sbbl %esi, %ecx
-; X86-NO-CMOV-NEXT: testl %esi, %esi
-; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NO-CMOV-NEXT: js .LBB4_2
-; X86-NO-CMOV-NEXT: # %bb.1:
-; X86-NO-CMOV-NEXT: movl %esi, %ecx
-; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NO-CMOV-NEXT: movl %edx, %ebx
-; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NO-CMOV-NEXT: .LBB4_2:
-; X86-NO-CMOV-NEXT: movl %ebp, (%eax)
-; X86-NO-CMOV-NEXT: movl %ebx, 4(%eax)
-; X86-NO-CMOV-NEXT: movl %edi, 8(%eax)
-; X86-NO-CMOV-NEXT: movl %ecx, 12(%eax)
-; X86-NO-CMOV-NEXT: popl %esi
-; X86-NO-CMOV-NEXT: popl %edi
-; X86-NO-CMOV-NEXT: popl %ebx
-; X86-NO-CMOV-NEXT: popl %ebp
-; X86-NO-CMOV-NEXT: retl $4
-;
-; X86-CMOV-LABEL: test_i128:
-; X86-CMOV: # %bb.0:
-; X86-CMOV-NEXT: pushl %ebp
-; X86-CMOV-NEXT: pushl %ebx
-; X86-CMOV-NEXT: pushl %edi
-; X86-CMOV-NEXT: pushl %esi
-; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-CMOV-NEXT: xorl %esi, %esi
-; X86-CMOV-NEXT: negl %edi
-; X86-CMOV-NEXT: movl $0, %ebx
-; X86-CMOV-NEXT: sbbl %edx, %ebx
-; X86-CMOV-NEXT: movl $0, %ebp
-; X86-CMOV-NEXT: sbbl %ecx, %ebp
-; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-CMOV-NEXT: sbbl %eax, %esi
-; X86-CMOV-NEXT: testl %eax, %eax
-; X86-CMOV-NEXT: cmovnsl %eax, %esi
-; X86-CMOV-NEXT: cmovnsl %ecx, %ebp
-; X86-CMOV-NEXT: cmovnsl %edx, %ebx
-; X86-CMOV-NEXT: cmovnsl {{[0-9]+}}(%esp), %edi
-; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-CMOV-NEXT: movl %edi, (%eax)
-; X86-CMOV-NEXT: movl %ebx, 4(%eax)
-; X86-CMOV-NEXT: movl %ebp, 8(%eax)
-; X86-CMOV-NEXT: movl %esi, 12(%eax)
-; X86-CMOV-NEXT: popl %esi
-; X86-CMOV-NEXT: popl %edi
-; X86-CMOV-NEXT: popl %ebx
-; X86-CMOV-NEXT: popl %ebp
-; X86-CMOV-NEXT: retl $4
+; X86-LABEL: test_i128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: addl %edx, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: adcl %edx, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: adcl %edx, %ebx
+; X86-NEXT: adcl %edx, %ecx
+; X86-NEXT: xorl %edx, %ecx
+; X86-NEXT: xorl %edx, %ebx
+; X86-NEXT: xorl %edx, %edi
+; X86-NEXT: xorl %edx, %esi
+; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %ebx, 8(%eax)
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl $4
;
; X64-LABEL: test_i128:
; X64: # %bb.0:
More information about the llvm-commits
mailing list