[llvm] 0121b1a - Revert "[TargetLowering][RISCV][X86] Support even divisors in expandDIVREMByConstant."
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 10 14:53:42 PDT 2022
Author: Craig Topper
Date: 2022-10-10T14:53:29-07:00
New Revision: 0121b1a4ac8d521dd6c2465bf4541434d6c5f940
URL: https://github.com/llvm/llvm-project/commit/0121b1a4ac8d521dd6c2465bf4541434d6c5f940
DIFF: https://github.com/llvm/llvm-project/commit/0121b1a4ac8d521dd6c2465bf4541434d6c5f940.diff
LOG: Revert "[TargetLowering][RISCV][X86] Support even divisors in expandDIVREMByConstant."
This reverts commit d4facda414b6b9b8b1a34bc7e6b7c15172775318.
This has been reported to cause failures. Reverting while I investigate.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll
llvm/test/CodeGen/RISCV/split-urem-by-constant.ll
llvm/test/CodeGen/X86/divide-by-constant.ll
llvm/test/CodeGen/X86/divmod128.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9dc84a6b4dbbd..a3070fe31c47e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7168,15 +7168,8 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
// Remainder = Sum % Constant
// This is based on "Remainder by Summing Digits" from Hacker's Delight.
//
-// If Constant is even, we can shift right the dividend and the divisor by the
-// number of trailing zeros in Constant before computing the remainder. Then
-// fixup the remainder by shifting it left by the number of trailing zeros and
-// adding the bits that were shifted out of the dividend.
-//
-// For division, we can compute the remainder using the algorithm described
-// above, subtract it from the dividend to get an exact multiple of Constant.
-// Then multiply that extact multiple by the multiplicative inverse modulo
-// (1 << (BitWidth / 2)).
+// For division, we can compute the remainder, subtract it from the dividend,
+// and then multiply by the multiplicative inverse modulo (1 << (BitWidth / 2)).
bool TargetLowering::expandDIVREMByConstant(SDNode *N,
SmallVectorImpl<SDValue> &Result,
EVT HiLoVT, SelectionDAG &DAG,
@@ -7195,7 +7188,7 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
if (!CN)
return false;
- APInt Divisor = CN->getAPIntValue();
+ const APInt &Divisor = CN->getAPIntValue();
unsigned BitWidth = Divisor.getBitWidth();
unsigned HBitWidth = BitWidth / 2;
assert(VT.getScalarSizeInBits() == BitWidth &&
@@ -7216,17 +7209,10 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
if (DAG.shouldOptForSize())
return false;
- // Early out for 0 or 1 divisors.
- if (Divisor.ule(1))
+ // Early out for 0, 1 or even divisors.
+ if (Divisor.ule(1) || Divisor[0] == 0)
return false;
- // If the divisor is even, shift it until it becomes odd.
- unsigned TrailingZeros = 0;
- if (!Divisor[0]) {
- TrailingZeros = Divisor.countTrailingZeros();
- Divisor.lshrInPlace(TrailingZeros);
- }
-
SDLoc dl(N);
SDValue Sum;
@@ -7243,35 +7229,17 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
DAG.getIntPtrConstant(1, dl));
}
- SDValue ShiftedLL = LL;
- SDValue ShiftedLH = LH;
-
- // Shift the input by the number of TrailingZeros in the divisor. The
- // shifted out bits will be added to the remainder later.
- if (TrailingZeros) {
- ShiftedLL = DAG.getNode(
- ISD::OR, dl, HiLoVT,
- DAG.getNode(ISD::SRL, dl, HiLoVT, ShiftedLL,
- DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
- DAG.getNode(ISD::SHL, dl, HiLoVT, ShiftedLH,
- DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
- HiLoVT, dl)));
- ShiftedLH =
- DAG.getNode(ISD::SRL, dl, HiLoVT, ShiftedLH,
- DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
- }
-
// Use addcarry if we can, otherwise use a compare to detect overflow.
EVT SetCCType =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
if (isOperationLegalOrCustom(ISD::ADDCARRY, HiLoVT)) {
SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
- Sum = DAG.getNode(ISD::UADDO, dl, VTList, ShiftedLL, ShiftedLH);
+ Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
Sum = DAG.getNode(ISD::ADDCARRY, dl, VTList, Sum,
DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
} else {
- Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, ShiftedLL, ShiftedLH);
- SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, ShiftedLL, ISD::SETULT);
+ Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
+ SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
// If the boolean for the target is 0 or 1, we can add the setcc result
// directly.
if (getBooleanContents(HiLoVT) ==
@@ -7295,17 +7263,6 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
// High half of the remainder is 0.
SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
- // If we shifted the input, shift the remainder left and add the bits we
- // shifted off the input.
- if (TrailingZeros) {
- APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
- RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
- DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
- RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL,
- DAG.getNode(ISD::AND, dl, HiLoVT, LL,
- DAG.getConstant(Mask, dl, HiLoVT)));
- }
-
// If we only want remainder, we're done.
if (Opcode == ISD::UREM) {
Result.push_back(RemL);
diff --git a/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll
index fd75ebc943274..dbfea8a03212a 100644
--- a/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll
+++ b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll
@@ -502,65 +502,24 @@ define iXLen2 @test_udiv_65537(iXLen2 %x) nounwind {
define iXLen2 @test_udiv_12(iXLen2 %x) nounwind {
; RV32-LABEL: test_udiv_12:
; RV32: # %bb.0:
-; RV32-NEXT: slli a2, a1, 30
-; RV32-NEXT: srli a3, a0, 2
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: srli a3, a1, 2
-; RV32-NEXT: add a3, a2, a3
-; RV32-NEXT: sltu a2, a3, a2
-; RV32-NEXT: add a2, a3, a2
-; RV32-NEXT: lui a3, 699051
-; RV32-NEXT: addi a4, a3, -1365
-; RV32-NEXT: mulhu a5, a2, a4
-; RV32-NEXT: srli a6, a5, 1
-; RV32-NEXT: andi a5, a5, -2
-; RV32-NEXT: add a5, a5, a6
-; RV32-NEXT: sub a2, a2, a5
-; RV32-NEXT: slli a2, a2, 2
-; RV32-NEXT: andi a5, a0, 3
-; RV32-NEXT: or a2, a2, a5
-; RV32-NEXT: sub a5, a0, a2
-; RV32-NEXT: addi a3, a3, -1366
-; RV32-NEXT: mul a3, a5, a3
-; RV32-NEXT: mulhu a6, a5, a4
-; RV32-NEXT: add a3, a6, a3
-; RV32-NEXT: sltu a0, a0, a2
-; RV32-NEXT: sub a0, a1, a0
-; RV32-NEXT: mul a0, a0, a4
-; RV32-NEXT: add a1, a3, a0
-; RV32-NEXT: mul a0, a5, a4
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a2, 12
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: call __udivdi3 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: test_udiv_12:
; RV64: # %bb.0:
-; RV64-NEXT: slli a2, a1, 62
-; RV64-NEXT: srli a3, a0, 2
-; RV64-NEXT: or a2, a3, a2
-; RV64-NEXT: srli a3, a1, 2
-; RV64-NEXT: lui a4, %hi(.LCPI10_0)
-; RV64-NEXT: ld a4, %lo(.LCPI10_0)(a4)
-; RV64-NEXT: add a3, a2, a3
-; RV64-NEXT: sltu a2, a3, a2
-; RV64-NEXT: add a2, a3, a2
-; RV64-NEXT: mulhu a3, a2, a4
-; RV64-NEXT: srli a5, a3, 1
-; RV64-NEXT: andi a3, a3, -2
-; RV64-NEXT: add a3, a3, a5
-; RV64-NEXT: sub a2, a2, a3
-; RV64-NEXT: slli a2, a2, 2
-; RV64-NEXT: lui a3, %hi(.LCPI10_1)
-; RV64-NEXT: ld a3, %lo(.LCPI10_1)(a3)
-; RV64-NEXT: andi a5, a0, 3
-; RV64-NEXT: or a2, a2, a5
-; RV64-NEXT: sub a5, a0, a2
-; RV64-NEXT: mul a3, a5, a3
-; RV64-NEXT: mulhu a6, a5, a4
-; RV64-NEXT: add a3, a6, a3
-; RV64-NEXT: sltu a0, a0, a2
-; RV64-NEXT: sub a0, a1, a0
-; RV64-NEXT: mul a0, a0, a4
-; RV64-NEXT: add a1, a3, a0
-; RV64-NEXT: mul a0, a5, a4
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: li a2, 12
+; RV64-NEXT: li a3, 0
+; RV64-NEXT: call __udivti3 at plt
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%a = udiv iXLen2 %x, 12
ret iXLen2 %a
diff --git a/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll b/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll
index 7a52de8f066e7..c0e210a7ec46e 100644
--- a/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll
+++ b/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll
@@ -335,46 +335,24 @@ define iXLen2 @test_urem_65537(iXLen2 %x) nounwind {
define iXLen2 @test_urem_12(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_12:
; RV32: # %bb.0:
-; RV32-NEXT: slli a2, a1, 30
-; RV32-NEXT: srli a3, a0, 2
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: srli a1, a1, 2
-; RV32-NEXT: add a1, a2, a1
-; RV32-NEXT: sltu a2, a1, a2
-; RV32-NEXT: add a1, a1, a2
-; RV32-NEXT: lui a2, 699051
-; RV32-NEXT: addi a2, a2, -1365
-; RV32-NEXT: mulhu a2, a1, a2
-; RV32-NEXT: srli a3, a2, 1
-; RV32-NEXT: andi a2, a2, -2
-; RV32-NEXT: add a2, a2, a3
-; RV32-NEXT: sub a1, a1, a2
-; RV32-NEXT: slli a1, a1, 2
-; RV32-NEXT: andi a0, a0, 3
-; RV32-NEXT: or a0, a1, a0
-; RV32-NEXT: li a1, 0
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a2, 12
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: call __umoddi3 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_12:
; RV64: # %bb.0:
-; RV64-NEXT: slli a2, a1, 62
-; RV64-NEXT: srli a3, a0, 2
-; RV64-NEXT: or a2, a3, a2
-; RV64-NEXT: srli a1, a1, 2
-; RV64-NEXT: lui a3, %hi(.LCPI10_0)
-; RV64-NEXT: ld a3, %lo(.LCPI10_0)(a3)
-; RV64-NEXT: add a1, a2, a1
-; RV64-NEXT: sltu a2, a1, a2
-; RV64-NEXT: add a1, a1, a2
-; RV64-NEXT: mulhu a2, a1, a3
-; RV64-NEXT: srli a3, a2, 1
-; RV64-NEXT: andi a2, a2, -2
-; RV64-NEXT: add a2, a2, a3
-; RV64-NEXT: sub a1, a1, a2
-; RV64-NEXT: slli a1, a1, 2
-; RV64-NEXT: andi a0, a0, 3
-; RV64-NEXT: or a0, a1, a0
-; RV64-NEXT: li a1, 0
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: li a2, 12
+; RV64-NEXT: li a3, 0
+; RV64-NEXT: call __umodti3 at plt
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%a = urem iXLen2 %x, 12
ret iXLen2 %a
diff --git a/llvm/test/CodeGen/X86/divide-by-constant.ll b/llvm/test/CodeGen/X86/divide-by-constant.ll
index 2f8a19b36b623..8f6d3ddb60e41 100644
--- a/llvm/test/CodeGen/X86/divide-by-constant.ll
+++ b/llvm/test/CodeGen/X86/divide-by-constant.ll
@@ -735,24 +735,13 @@ entry:
define i64 @urem_i64_12(i64 %x) nounwind {
; X32-LABEL: urem_i64_12:
; X32: # %bb.0: # %entry
-; X32-NEXT: pushl %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: shrl $2, %eax
-; X32-NEXT: shldl $30, %esi, %ecx
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edx
-; X32-NEXT: shrl %edx
-; X32-NEXT: leal (%edx,%edx,2), %eax
-; X32-NEXT: subl %eax, %ecx
-; X32-NEXT: andl $3, %esi
-; X32-NEXT: leal (%esi,%ecx,4), %eax
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: popl %esi
+; X32-NEXT: subl $12, %esp
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $12
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll __umoddi3
+; X32-NEXT: addl $28, %esp
; X32-NEXT: retl
;
; X64-LABEL: urem_i64_12:
@@ -1127,37 +1116,13 @@ entry:
define i64 @udiv_i64_12(i64 %x) nounwind {
; X32-LABEL: udiv_i64_12:
; X32: # %bb.0: # %entry
-; X32-NEXT: pushl %ebx
-; X32-NEXT: pushl %edi
-; X32-NEXT: pushl %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: shrl $2, %eax
-; X32-NEXT: movl %edi, %esi
-; X32-NEXT: shldl $30, %ecx, %esi
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl $-1431655765, %ebx # imm = 0xAAAAAAAB
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebx
-; X32-NEXT: shrl %edx
-; X32-NEXT: leal (%edx,%edx,2), %eax
-; X32-NEXT: subl %eax, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: andl $3, %eax
-; X32-NEXT: leal (%eax,%esi,4), %eax
-; X32-NEXT: subl %eax, %ecx
-; X32-NEXT: sbbl $0, %edi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %ebx
-; X32-NEXT: imull $-1431655766, %ecx, %ecx # imm = 0xAAAAAAAA
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: imull $-1431655765, %edi, %ecx # imm = 0xAAAAAAAB
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %edi
-; X32-NEXT: popl %ebx
+; X32-NEXT: subl $12, %esp
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $12
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll __udivdi3
+; X32-NEXT: addl $28, %esp
; X32-NEXT: retl
;
; X64-LABEL: udiv_i64_12:
diff --git a/llvm/test/CodeGen/X86/divmod128.ll b/llvm/test/CodeGen/X86/divmod128.ll
index cb234263384a1..4549598ca3ed9 100644
--- a/llvm/test/CodeGen/X86/divmod128.ll
+++ b/llvm/test/CodeGen/X86/divmod128.ll
@@ -425,39 +425,27 @@ entry:
define i128 @urem_i128_12(i128 %x) nounwind {
; X86-64-LABEL: urem_i128_12:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: movq %rsi, %rax
-; X86-64-NEXT: shldq $62, %rdi, %rax
-; X86-64-NEXT: shrq $2, %rsi
-; X86-64-NEXT: addq %rax, %rsi
-; X86-64-NEXT: adcq $0, %rsi
-; X86-64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB
-; X86-64-NEXT: movq %rsi, %rax
-; X86-64-NEXT: mulq %rcx
-; X86-64-NEXT: shrq %rdx
-; X86-64-NEXT: leaq (%rdx,%rdx,2), %rax
-; X86-64-NEXT: subq %rax, %rsi
-; X86-64-NEXT: andl $3, %edi
-; X86-64-NEXT: leaq (%rdi,%rsi,4), %rax
-; X86-64-NEXT: xorl %edx, %edx
+; X86-64-NEXT: pushq %rax
+; X86-64-NEXT: movl $12, %edx
+; X86-64-NEXT: xorl %ecx, %ecx
+; X86-64-NEXT: callq __umodti3 at PLT
+; X86-64-NEXT: popq %rcx
; X86-64-NEXT: retq
;
; WIN64-LABEL: urem_i128_12:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: movq %rdx, %r8
-; WIN64-NEXT: movq %rdx, %rax
-; WIN64-NEXT: shldq $62, %rcx, %rax
-; WIN64-NEXT: shrq $2, %r8
-; WIN64-NEXT: addq %rax, %r8
-; WIN64-NEXT: adcq $0, %r8
-; WIN64-NEXT: movabsq $-6148914691236517205, %rdx # imm = 0xAAAAAAAAAAAAAAAB
-; WIN64-NEXT: movq %r8, %rax
-; WIN64-NEXT: mulq %rdx
-; WIN64-NEXT: shrq %rdx
-; WIN64-NEXT: leaq (%rdx,%rdx,2), %rax
-; WIN64-NEXT: subq %rax, %r8
-; WIN64-NEXT: andl $3, %ecx
-; WIN64-NEXT: leaq (%rcx,%r8,4), %rax
-; WIN64-NEXT: xorl %edx, %edx
+; WIN64-NEXT: subq $72, %rsp
+; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movq $12, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; WIN64-NEXT: callq __umodti3
+; WIN64-NEXT: movq %xmm0, %rax
+; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; WIN64-NEXT: movq %xmm0, %rdx
+; WIN64-NEXT: addq $72, %rsp
; WIN64-NEXT: retq
entry:
%rem = urem i128 %x, 12
@@ -899,59 +887,27 @@ entry:
define i128 @udiv_i128_12(i128 %x) nounwind {
; X86-64-LABEL: udiv_i128_12:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: movq %rsi, %rax
-; X86-64-NEXT: shldq $62, %rdi, %rax
-; X86-64-NEXT: movq %rsi, %rcx
-; X86-64-NEXT: shrq $2, %rcx
-; X86-64-NEXT: addq %rax, %rcx
-; X86-64-NEXT: adcq $0, %rcx
-; X86-64-NEXT: movabsq $-6148914691236517205, %r8 # imm = 0xAAAAAAAAAAAAAAAB
-; X86-64-NEXT: movq %rcx, %rax
-; X86-64-NEXT: mulq %r8
-; X86-64-NEXT: shrq %rdx
-; X86-64-NEXT: leaq (%rdx,%rdx,2), %rax
-; X86-64-NEXT: subq %rax, %rcx
-; X86-64-NEXT: movl %edi, %eax
-; X86-64-NEXT: andl $3, %eax
-; X86-64-NEXT: leaq (%rax,%rcx,4), %rax
-; X86-64-NEXT: subq %rax, %rdi
-; X86-64-NEXT: sbbq $0, %rsi
-; X86-64-NEXT: movabsq $-6148914691236517206, %rcx # imm = 0xAAAAAAAAAAAAAAAA
-; X86-64-NEXT: imulq %rdi, %rcx
-; X86-64-NEXT: movq %rdi, %rax
-; X86-64-NEXT: mulq %r8
-; X86-64-NEXT: addq %rcx, %rdx
-; X86-64-NEXT: imulq %rsi, %r8
-; X86-64-NEXT: addq %r8, %rdx
+; X86-64-NEXT: pushq %rax
+; X86-64-NEXT: movl $12, %edx
+; X86-64-NEXT: xorl %ecx, %ecx
+; X86-64-NEXT: callq __udivti3 at PLT
+; X86-64-NEXT: popq %rcx
; X86-64-NEXT: retq
;
; WIN64-LABEL: udiv_i128_12:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: movq %rdx, %r8
-; WIN64-NEXT: movq %rdx, %rax
-; WIN64-NEXT: shldq $62, %rcx, %rax
-; WIN64-NEXT: movq %rdx, %r9
-; WIN64-NEXT: shrq $2, %r9
-; WIN64-NEXT: addq %rax, %r9
-; WIN64-NEXT: adcq $0, %r9
-; WIN64-NEXT: movabsq $-6148914691236517205, %r10 # imm = 0xAAAAAAAAAAAAAAAB
-; WIN64-NEXT: movq %r9, %rax
-; WIN64-NEXT: mulq %r10
-; WIN64-NEXT: shrq %rdx
-; WIN64-NEXT: leaq (%rdx,%rdx,2), %rax
-; WIN64-NEXT: subq %rax, %r9
-; WIN64-NEXT: movl %ecx, %eax
-; WIN64-NEXT: andl $3, %eax
-; WIN64-NEXT: leaq (%rax,%r9,4), %rax
-; WIN64-NEXT: subq %rax, %rcx
-; WIN64-NEXT: sbbq $0, %r8
-; WIN64-NEXT: movabsq $-6148914691236517206, %r9 # imm = 0xAAAAAAAAAAAAAAAA
-; WIN64-NEXT: imulq %rcx, %r9
-; WIN64-NEXT: movq %rcx, %rax
-; WIN64-NEXT: mulq %r10
-; WIN64-NEXT: addq %r9, %rdx
-; WIN64-NEXT: imulq %r10, %r8
-; WIN64-NEXT: addq %r8, %rdx
+; WIN64-NEXT: subq $72, %rsp
+; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movq $12, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; WIN64-NEXT: callq __udivti3
+; WIN64-NEXT: movq %xmm0, %rax
+; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; WIN64-NEXT: movq %xmm0, %rdx
+; WIN64-NEXT: addq $72, %rsp
; WIN64-NEXT: retq
entry:
%rem = udiv i128 %x, 12
More information about the llvm-commits
mailing list