[llvm] 65aaecc - Revert "[TargetLowering][RISCV][X86] Support even divisors in expandDIVREMByConstant."
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 24 07:13:18 PDT 2022
Author: Craig Topper
Date: 2022-10-24T07:12:54-07:00
New Revision: 65aaecca8842dec30d03734a7fe8ce33c5afec81
URL: https://github.com/llvm/llvm-project/commit/65aaecca8842dec30d03734a7fe8ce33c5afec81
DIFF: https://github.com/llvm/llvm-project/commit/65aaecca8842dec30d03734a7fe8ce33c5afec81.diff
LOG: Revert "[TargetLowering][RISCV][X86] Support even divisors in expandDIVREMByConstant."
This reverts commit f6a7b47820904c5e69cc4f133d382c74a87c44e8.
I received a report that this fails on 32-bit X86.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll
llvm/test/CodeGen/RISCV/split-urem-by-constant.ll
llvm/test/CodeGen/X86/divide-by-constant.ll
llvm/test/CodeGen/X86/divmod128.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 33631f290e284..2aa8c04f43251 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7168,17 +7168,8 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
// Remainder = Sum % Constant
// This is based on "Remainder by Summing Digits" from Hacker's Delight.
//
-// For division, we can compute the remainder using the algorithm described
-// above, subtract it from the dividend to get an exact multiple of Constant.
-// Then multiply that extact multiply by the multiplicative inverse modulo
-// (1 << (BitWidth / 2)) to get the quotient.
-
-// If Constant is even, we can shift right the dividend and the divisor by the
-// number of trailing zeros in Constant before applying the remainder algorithm.
-// If we're after the quotient, we can subtract this value from the shifted
-// dividend and multiply by the multiplicative inverse of the shifted divisor.
-// If we want the remainder, we shift the value left by the number of trailing
-// zeros and add the bits that were shifted out of the dividend.
+// For division, we can compute the remainder, subtract it from the dividend,
+// and then multiply by the multiplicative inverse modulo (1 << (BitWidth / 2)).
bool TargetLowering::expandDIVREMByConstant(SDNode *N,
SmallVectorImpl<SDValue> &Result,
EVT HiLoVT, SelectionDAG &DAG,
@@ -7197,7 +7188,7 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
if (!CN)
return false;
- APInt Divisor = CN->getAPIntValue();
+ const APInt &Divisor = CN->getAPIntValue();
unsigned BitWidth = Divisor.getBitWidth();
unsigned HBitWidth = BitWidth / 2;
assert(VT.getScalarSizeInBits() == BitWidth &&
@@ -7218,20 +7209,12 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
if (DAG.shouldOptForSize())
return false;
- // Early out for 0 or 1 divisors.
- if (Divisor.ule(1))
+ // Early out for 0, 1 or even divisors.
+ if (Divisor.ule(1) || Divisor[0] == 0)
return false;
- // If the divisor is even, shift it until it becomes odd.
- unsigned TrailingZeros = 0;
- if (!Divisor[0]) {
- TrailingZeros = Divisor.countTrailingZeros();
- Divisor.lshrInPlace(TrailingZeros);
- }
-
SDLoc dl(N);
SDValue Sum;
- SDValue PartialRem;
// If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
// then add in the carry.
@@ -7246,27 +7229,6 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
DAG.getIntPtrConstant(1, dl));
}
- // Shift the input by the number of TrailingZeros in the divisor. The
- // shifted out bits will be added to the remainder later.
- if (TrailingZeros) {
- LL = DAG.getNode(
- ISD::OR, dl, HiLoVT,
- DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
- DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
- DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
- DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
- HiLoVT, dl)));
- LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
- DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
-
- // Save the shifted off bits if we need the remainder.
- if (Opcode != ISD::UDIV) {
- APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
- PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
- DAG.getConstant(Mask, dl, HiLoVT));
- }
- }
-
// Use addcarry if we can, otherwise use a compare to detect overflow.
EVT SetCCType =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
@@ -7298,45 +7260,45 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
SDValue RemL =
DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
+ // High half of the remainder is 0.
SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
- if (Opcode != ISD::UREM) {
- // Subtract the remainder from the shifted dividend.
- SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
- SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
-
- Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
-
- // Multiply by the multiplicative inverse of the divisor modulo
- // (1 << BitWidth).
- APInt Mod = APInt::getSignedMinValue(BitWidth + 1);
- APInt MulFactor = Divisor.zext(BitWidth + 1);
- MulFactor = MulFactor.multiplicativeInverse(Mod);
- MulFactor = MulFactor.trunc(BitWidth);
-
- SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
- DAG.getConstant(MulFactor, dl, VT));
-
- // Split the quotient into low and high parts.
- SDValue QuotL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
- DAG.getIntPtrConstant(0, dl));
- SDValue QuotH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
- DAG.getIntPtrConstant(1, dl));
- Result.push_back(QuotL);
- Result.push_back(QuotH);
- }
-
- if (Opcode != ISD::UDIV) {
- // If we shifted the input, shift the remainder left and add the bits we
- // shifted off the input.
- if (TrailingZeros) {
- APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
- RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
- DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
- RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
- }
+ // If we only want remainder, we're done.
+ if (Opcode == ISD::UREM) {
+ Result.push_back(RemL);
+ Result.push_back(RemH);
+ return true;
+ }
+
+ // Otherwise, we need to compute the quotient.
+
+ // Join the remainder halves.
+ SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
+
+ // Subtract the remainder from the input.
+ SDValue In = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Rem);
+
+ // Multiply by the multiplicative inverse of the divisor modulo
+ // (1 << BitWidth).
+ APInt Mod = APInt::getSignedMinValue(BitWidth + 1);
+ APInt MulFactor = Divisor.zext(BitWidth + 1);
+ MulFactor = MulFactor.multiplicativeInverse(Mod);
+ MulFactor = MulFactor.trunc(BitWidth);
+
+ SDValue Quotient =
+ DAG.getNode(ISD::MUL, dl, VT, In, DAG.getConstant(MulFactor, dl, VT));
+
+ // Split the quotient into low and high parts.
+ SDValue QuotL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
+ DAG.getIntPtrConstant(0, dl));
+ SDValue QuotH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
+ DAG.getIntPtrConstant(1, dl));
+ Result.push_back(QuotL);
+ Result.push_back(QuotH);
+ // For DIVREM, also return the remainder parts.
+ if (Opcode == ISD::UDIVREM) {
Result.push_back(RemL);
- Result.push_back(DAG.getConstant(0, dl, HiLoVT));
+ Result.push_back(RemH);
}
return true;
diff --git a/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll
index cd2b7aa9f351d..dbfea8a03212a 100644
--- a/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll
+++ b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll
@@ -502,59 +502,24 @@ define iXLen2 @test_udiv_65537(iXLen2 %x) nounwind {
define iXLen2 @test_udiv_12(iXLen2 %x) nounwind {
; RV32-LABEL: test_udiv_12:
; RV32: # %bb.0:
-; RV32-NEXT: slli a2, a1, 30
-; RV32-NEXT: srli a0, a0, 2
-; RV32-NEXT: or a0, a0, a2
-; RV32-NEXT: srli a1, a1, 2
-; RV32-NEXT: add a2, a0, a1
-; RV32-NEXT: sltu a3, a2, a0
-; RV32-NEXT: add a2, a2, a3
-; RV32-NEXT: lui a3, 699051
-; RV32-NEXT: addi a4, a3, -1365
-; RV32-NEXT: mulhu a5, a2, a4
-; RV32-NEXT: srli a6, a5, 1
-; RV32-NEXT: andi a5, a5, -2
-; RV32-NEXT: add a5, a5, a6
-; RV32-NEXT: sub a2, a2, a5
-; RV32-NEXT: sub a5, a0, a2
-; RV32-NEXT: addi a3, a3, -1366
-; RV32-NEXT: mul a3, a5, a3
-; RV32-NEXT: mulhu a6, a5, a4
-; RV32-NEXT: add a3, a6, a3
-; RV32-NEXT: sltu a0, a0, a2
-; RV32-NEXT: sub a0, a1, a0
-; RV32-NEXT: mul a0, a0, a4
-; RV32-NEXT: add a1, a3, a0
-; RV32-NEXT: mul a0, a5, a4
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a2, 12
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: call __udivdi3 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: test_udiv_12:
; RV64: # %bb.0:
-; RV64-NEXT: slli a2, a1, 62
-; RV64-NEXT: srli a0, a0, 2
-; RV64-NEXT: or a0, a0, a2
-; RV64-NEXT: srli a1, a1, 2
-; RV64-NEXT: lui a2, %hi(.LCPI10_0)
-; RV64-NEXT: ld a2, %lo(.LCPI10_0)(a2)
-; RV64-NEXT: add a3, a0, a1
-; RV64-NEXT: sltu a4, a3, a0
-; RV64-NEXT: add a3, a3, a4
-; RV64-NEXT: mulhu a4, a3, a2
-; RV64-NEXT: srli a5, a4, 1
-; RV64-NEXT: andi a4, a4, -2
-; RV64-NEXT: lui a6, %hi(.LCPI10_1)
-; RV64-NEXT: ld a6, %lo(.LCPI10_1)(a6)
-; RV64-NEXT: add a4, a4, a5
-; RV64-NEXT: sub a3, a3, a4
-; RV64-NEXT: sub a4, a0, a3
-; RV64-NEXT: mul a5, a4, a6
-; RV64-NEXT: mulhu a6, a4, a2
-; RV64-NEXT: add a5, a6, a5
-; RV64-NEXT: sltu a0, a0, a3
-; RV64-NEXT: sub a0, a1, a0
-; RV64-NEXT: mul a0, a0, a2
-; RV64-NEXT: add a1, a5, a0
-; RV64-NEXT: mul a0, a4, a2
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: li a2, 12
+; RV64-NEXT: li a3, 0
+; RV64-NEXT: call __udivti3 at plt
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%a = udiv iXLen2 %x, 12
ret iXLen2 %a
diff --git a/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll b/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll
index aa235c48c56a9..c0e210a7ec46e 100644
--- a/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll
+++ b/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll
@@ -335,46 +335,24 @@ define iXLen2 @test_urem_65537(iXLen2 %x) nounwind {
define iXLen2 @test_urem_12(iXLen2 %x) nounwind {
; RV32-LABEL: test_urem_12:
; RV32: # %bb.0:
-; RV32-NEXT: slli a2, a1, 30
-; RV32-NEXT: srli a0, a0, 2
-; RV32-NEXT: or a0, a0, a2
-; RV32-NEXT: srli a1, a1, 2
-; RV32-NEXT: add a1, a0, a1
-; RV32-NEXT: sltu a2, a1, a0
-; RV32-NEXT: add a1, a1, a2
-; RV32-NEXT: lui a2, 699051
-; RV32-NEXT: addi a2, a2, -1365
-; RV32-NEXT: mulhu a2, a1, a2
-; RV32-NEXT: srli a3, a2, 1
-; RV32-NEXT: andi a2, a2, -2
-; RV32-NEXT: add a2, a2, a3
-; RV32-NEXT: sub a1, a1, a2
-; RV32-NEXT: slli a1, a1, 2
-; RV32-NEXT: andi a0, a0, 3
-; RV32-NEXT: or a0, a1, a0
-; RV32-NEXT: li a1, 0
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a2, 12
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: call __umoddi3 at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_12:
; RV64: # %bb.0:
-; RV64-NEXT: slli a2, a1, 62
-; RV64-NEXT: srli a0, a0, 2
-; RV64-NEXT: or a0, a0, a2
-; RV64-NEXT: srli a1, a1, 2
-; RV64-NEXT: lui a2, %hi(.LCPI10_0)
-; RV64-NEXT: ld a2, %lo(.LCPI10_0)(a2)
-; RV64-NEXT: add a1, a0, a1
-; RV64-NEXT: sltu a3, a1, a0
-; RV64-NEXT: add a1, a1, a3
-; RV64-NEXT: mulhu a2, a1, a2
-; RV64-NEXT: srli a3, a2, 1
-; RV64-NEXT: andi a2, a2, -2
-; RV64-NEXT: add a2, a2, a3
-; RV64-NEXT: sub a1, a1, a2
-; RV64-NEXT: slli a1, a1, 2
-; RV64-NEXT: andi a0, a0, 3
-; RV64-NEXT: or a0, a1, a0
-; RV64-NEXT: li a1, 0
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: li a2, 12
+; RV64-NEXT: li a3, 0
+; RV64-NEXT: call __umodti3 at plt
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%a = urem iXLen2 %x, 12
ret iXLen2 %a
diff --git a/llvm/test/CodeGen/X86/divide-by-constant.ll b/llvm/test/CodeGen/X86/divide-by-constant.ll
index 9197dc0c1d64e..8f6d3ddb60e41 100644
--- a/llvm/test/CodeGen/X86/divide-by-constant.ll
+++ b/llvm/test/CodeGen/X86/divide-by-constant.ll
@@ -735,23 +735,13 @@ entry:
define i64 @urem_i64_12(i64 %x) nounwind {
; X32-LABEL: urem_i64_12:
; X32: # %bb.0: # %entry
-; X32-NEXT: pushl %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: shrdl $2, %ecx, %esi
-; X32-NEXT: shrl $2, %ecx
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edx
-; X32-NEXT: shrl %edx
-; X32-NEXT: leal (%edx,%edx,2), %eax
-; X32-NEXT: subl %eax, %ecx
-; X32-NEXT: andl $3, %esi
-; X32-NEXT: leal (%esi,%ecx,4), %eax
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: popl %esi
+; X32-NEXT: subl $12, %esp
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $12
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll __umoddi3
+; X32-NEXT: addl $28, %esp
; X32-NEXT: retl
;
; X64-LABEL: urem_i64_12:
@@ -1126,33 +1116,13 @@ entry:
define i64 @udiv_i64_12(i64 %x) nounwind {
; X32-LABEL: udiv_i64_12:
; X32: # %bb.0: # %entry
-; X32-NEXT: pushl %ebx
-; X32-NEXT: pushl %edi
-; X32-NEXT: pushl %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT: shrdl $2, %edi, %ecx
-; X32-NEXT: shrl $2, %edi
-; X32-NEXT: movl %ecx, %esi
-; X32-NEXT: addl %edi, %esi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl $-1431655765, %ebx # imm = 0xAAAAAAAB
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebx
-; X32-NEXT: shrl %edx
-; X32-NEXT: leal (%edx,%edx,2), %eax
-; X32-NEXT: subl %eax, %esi
-; X32-NEXT: subl %esi, %ecx
-; X32-NEXT: sbbl $0, %edi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %ebx
-; X32-NEXT: imull $-1431655766, %ecx, %ecx # imm = 0xAAAAAAAA
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: imull $-1431655765, %edi, %ecx # imm = 0xAAAAAAAB
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %edi
-; X32-NEXT: popl %ebx
+; X32-NEXT: subl $12, %esp
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $12
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll __udivdi3
+; X32-NEXT: addl $28, %esp
; X32-NEXT: retl
;
; X64-LABEL: udiv_i64_12:
diff --git a/llvm/test/CodeGen/X86/divmod128.ll b/llvm/test/CodeGen/X86/divmod128.ll
index b0657f4f8456d..4549598ca3ed9 100644
--- a/llvm/test/CodeGen/X86/divmod128.ll
+++ b/llvm/test/CodeGen/X86/divmod128.ll
@@ -425,37 +425,27 @@ entry:
define i128 @urem_i128_12(i128 %x) nounwind {
; X86-64-LABEL: urem_i128_12:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: shrdq $2, %rsi, %rdi
-; X86-64-NEXT: shrq $2, %rsi
-; X86-64-NEXT: addq %rdi, %rsi
-; X86-64-NEXT: adcq $0, %rsi
-; X86-64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB
-; X86-64-NEXT: movq %rsi, %rax
-; X86-64-NEXT: mulq %rcx
-; X86-64-NEXT: shrq %rdx
-; X86-64-NEXT: leaq (%rdx,%rdx,2), %rax
-; X86-64-NEXT: subq %rax, %rsi
-; X86-64-NEXT: andl $3, %edi
-; X86-64-NEXT: leaq (%rdi,%rsi,4), %rax
-; X86-64-NEXT: xorl %edx, %edx
+; X86-64-NEXT: pushq %rax
+; X86-64-NEXT: movl $12, %edx
+; X86-64-NEXT: xorl %ecx, %ecx
+; X86-64-NEXT: callq __umodti3 at PLT
+; X86-64-NEXT: popq %rcx
; X86-64-NEXT: retq
;
; WIN64-LABEL: urem_i128_12:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: movq %rdx, %r8
-; WIN64-NEXT: shrdq $2, %rdx, %rcx
-; WIN64-NEXT: shrq $2, %r8
-; WIN64-NEXT: addq %rcx, %r8
-; WIN64-NEXT: adcq $0, %r8
-; WIN64-NEXT: movabsq $-6148914691236517205, %rdx # imm = 0xAAAAAAAAAAAAAAAB
-; WIN64-NEXT: movq %r8, %rax
-; WIN64-NEXT: mulq %rdx
-; WIN64-NEXT: shrq %rdx
-; WIN64-NEXT: leaq (%rdx,%rdx,2), %rax
-; WIN64-NEXT: subq %rax, %r8
-; WIN64-NEXT: andl $3, %ecx
-; WIN64-NEXT: leaq (%rcx,%r8,4), %rax
-; WIN64-NEXT: xorl %edx, %edx
+; WIN64-NEXT: subq $72, %rsp
+; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movq $12, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; WIN64-NEXT: callq __umodti3
+; WIN64-NEXT: movq %xmm0, %rax
+; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; WIN64-NEXT: movq %xmm0, %rdx
+; WIN64-NEXT: addq $72, %rsp
; WIN64-NEXT: retq
entry:
%rem = urem i128 %x, 12
@@ -897,51 +887,27 @@ entry:
define i128 @udiv_i128_12(i128 %x) nounwind {
; X86-64-LABEL: udiv_i128_12:
; X86-64: # %bb.0: # %entry
-; X86-64-NEXT: shrdq $2, %rsi, %rdi
-; X86-64-NEXT: shrq $2, %rsi
-; X86-64-NEXT: movq %rdi, %rcx
-; X86-64-NEXT: addq %rsi, %rcx
-; X86-64-NEXT: adcq $0, %rcx
-; X86-64-NEXT: movabsq $-6148914691236517205, %r8 # imm = 0xAAAAAAAAAAAAAAAB
-; X86-64-NEXT: movq %rcx, %rax
-; X86-64-NEXT: mulq %r8
-; X86-64-NEXT: shrq %rdx
-; X86-64-NEXT: leaq (%rdx,%rdx,2), %rax
-; X86-64-NEXT: subq %rax, %rcx
-; X86-64-NEXT: subq %rcx, %rdi
-; X86-64-NEXT: sbbq $0, %rsi
-; X86-64-NEXT: movabsq $-6148914691236517206, %rcx # imm = 0xAAAAAAAAAAAAAAAA
-; X86-64-NEXT: imulq %rdi, %rcx
-; X86-64-NEXT: movq %rdi, %rax
-; X86-64-NEXT: mulq %r8
-; X86-64-NEXT: addq %rcx, %rdx
-; X86-64-NEXT: imulq %rsi, %r8
-; X86-64-NEXT: addq %r8, %rdx
+; X86-64-NEXT: pushq %rax
+; X86-64-NEXT: movl $12, %edx
+; X86-64-NEXT: xorl %ecx, %ecx
+; X86-64-NEXT: callq __udivti3 at PLT
+; X86-64-NEXT: popq %rcx
; X86-64-NEXT: retq
;
; WIN64-LABEL: udiv_i128_12:
; WIN64: # %bb.0: # %entry
-; WIN64-NEXT: movq %rdx, %r8
-; WIN64-NEXT: shrdq $2, %rdx, %rcx
-; WIN64-NEXT: shrq $2, %r8
-; WIN64-NEXT: movq %rcx, %r9
-; WIN64-NEXT: addq %r8, %r9
-; WIN64-NEXT: adcq $0, %r9
-; WIN64-NEXT: movabsq $-6148914691236517205, %r10 # imm = 0xAAAAAAAAAAAAAAAB
-; WIN64-NEXT: movq %r9, %rax
-; WIN64-NEXT: mulq %r10
-; WIN64-NEXT: shrq %rdx
-; WIN64-NEXT: leaq (%rdx,%rdx,2), %rax
-; WIN64-NEXT: subq %rax, %r9
-; WIN64-NEXT: subq %r9, %rcx
-; WIN64-NEXT: sbbq $0, %r8
-; WIN64-NEXT: movabsq $-6148914691236517206, %r9 # imm = 0xAAAAAAAAAAAAAAAA
-; WIN64-NEXT: imulq %rcx, %r9
-; WIN64-NEXT: movq %rcx, %rax
-; WIN64-NEXT: mulq %r10
-; WIN64-NEXT: addq %r9, %rdx
-; WIN64-NEXT: imulq %r10, %r8
-; WIN64-NEXT: addq %r8, %rdx
+; WIN64-NEXT: subq $72, %rsp
+; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movq $12, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
+; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; WIN64-NEXT: callq __udivti3
+; WIN64-NEXT: movq %xmm0, %rax
+; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; WIN64-NEXT: movq %xmm0, %rdx
+; WIN64-NEXT: addq $72, %rsp
; WIN64-NEXT: retq
entry:
%rem = udiv i128 %x, 12
More information about the llvm-commits
mailing list