[llvm] [SelectionDAG][X86] Fold `sub(x, mul(divrem(x,y)[0], y))` to `divrem(x, y)[1]` (PR #136565)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 21 06:24:52 PDT 2025
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/136565
>From b219a178569826b2b951e2dc2697f03a3edb4d85 Mon Sep 17 00:00:00 2001
From: Iris Shi <0.0 at owo.li>
Date: Mon, 21 Apr 2025 21:13:48 +0800
Subject: [PATCH 1/2] pre-commit tests
---
.../X86/div-rem-pair-recomposition-signed.ll | 39 +++++++++++++++++++
.../div-rem-pair-recomposition-unsigned.ll | 24 ++++++++++++
2 files changed, 63 insertions(+)
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
index 880062bbc4f9e..fc624aef0fcdd 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
@@ -1337,6 +1337,45 @@ end:
ret i32 %div
}
+define i32 @scalar_i32_const_pow2_divisor(i32 %x, ptr %rem) {
+; X86-LABEL: scalar_i32_const_pow2_divisor:
+; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: leal 255(%edx), %eax
+; X86-NEXT: testl %edx, %edx
+; X86-NEXT: cmovnsl %edx, %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: andl $-256, %esi
+; X86-NEXT: subl %esi, %edx
+; X86-NEXT: movl %edx, (%ecx)
+; X86-NEXT: sarl $8, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: scalar_i32_const_pow2_divisor:
+; X64: # %bb.0: # %entry
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal 255(%rdi), %eax
+; X64-NEXT: testl %edi, %edi
+; X64-NEXT: cmovnsl %edi, %eax
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: andl $-256, %ecx
+; X64-NEXT: subl %ecx, %edi
+; X64-NEXT: movl %edi, (%rsi)
+; X64-NEXT: sarl $8, %eax
+; X64-NEXT: retq
+entry:
+ %rem1 = srem i32 %x, 256
+ store i32 %rem1, ptr %rem, align 4
+ %div = sdiv i32 %x, 256
+ ret i32 %div
+}
+
define i32 @negative_different_x(i32 %x0, i32 %x1, i32 %y, ptr %divdst) nounwind {
; X86-LABEL: negative_different_x:
; X86: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
index 6fdde0b14a984..54d7f49cd4323 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -1291,6 +1291,30 @@ end:
ret i32 %div
}
+define i32 @scalar_i32_const_pow2_divisor(i32 %x, ptr %rem) {
+; X86-LABEL: scalar_i32_const_pow2_divisor:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl %al, %edx
+; X86-NEXT: movl %edx, (%ecx)
+; X86-NEXT: shrl $8, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: scalar_i32_const_pow2_divisor:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movzbl %al, %ecx
+; X64-NEXT: movl %ecx, (%rsi)
+; X64-NEXT: shrl $8, %eax
+; X64-NEXT: retq
+entry:
+ %rem1 = urem i32 %x, 256
+ store i32 %rem1, ptr %rem, align 4
+ %div = udiv i32 %x, 256
+ ret i32 %div
+}
+
define i32 @negative_different_x(i32 %x0, i32 %x1, i32 %y, ptr %divdst) nounwind {
; X86-LABEL: negative_different_x:
; X86: # %bb.0:
>From dd970662e60e985ab462dc686aa1e890cadcd517 Mon Sep 17 00:00:00 2001
From: Iris Shi <0.0 at owo.li>
Date: Mon, 21 Apr 2025 21:18:08 +0800
Subject: [PATCH 2/2] [SelectionDAG][X86] Fold `sub(x, mul(divrem(x,y)[0], y))`
to `divrem(x, y)[1]`
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 57 +++++++++
.../X86/div-rem-pair-recomposition-signed.ll | 110 ++++++------------
.../div-rem-pair-recomposition-unsigned.ll | 110 ++++++------------
3 files changed, 123 insertions(+), 154 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b175e35385ec6..a69e2c9ee6c0c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3867,6 +3867,60 @@ static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG) {
return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
}
+// Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
+static SDValue foldSubOfQuotientToRem(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::SUB && "Node must be a SUB");
+ SDValue Sub0 = N->getOperand(0);
+ SDValue Sub1 = N->getOperand(1);
+ SDLoc DL(N);
+
+ auto CheckAndFoldMulCase = [&](SDValue DivRem, SDValue MaybeY) -> SDValue {
+ if ((DivRem.getOpcode() == ISD::SDIVREM ||
+ DivRem.getOpcode() == ISD::UDIVREM) &&
+ DivRem.getResNo() == 0 && DivRem.getOperand(0) == Sub0 &&
+ DivRem.getOperand(1) == MaybeY) {
+ return SDValue(DivRem.getNode(), 1);
+ }
+ return SDValue();
+ };
+
+ if (Sub1.getOpcode() == ISD::MUL) {
+ // (sub x, (mul divrem(x,y)[0], y))
+ SDValue Mul0 = Sub1.getOperand(0);
+ SDValue Mul1 = Sub1.getOperand(1);
+
+ SDValue Res = CheckAndFoldMulCase(Mul0, Mul1);
+ if (Res.getNode())
+ return Res;
+
+ Res = CheckAndFoldMulCase(Mul1, Mul0);
+ if (Res.getNode())
+ return Res;
+
+ } else if (Sub1.getOpcode() == ISD::SHL) {
+ // Handle (sub x, (shl divrem(x,y)[0], C)) where y = 1 << C
+ SDValue Shl0 = Sub1.getOperand(0);
+ SDValue Shl1 = Sub1.getOperand(1);
+ // Check if Shl0 is divrem(x, Y)[0]
+ if ((Shl0.getOpcode() == ISD::SDIVREM ||
+ Shl0.getOpcode() == ISD::UDIVREM) &&
+ Shl0.getResNo() == 0 && Shl0.getOperand(0) == Sub0) {
+
+ SDValue Divisor = Shl0.getOperand(1);
+
+ // Check if DivRemDivisor is a constant power of 2
+ auto *C = dyn_cast<ConstantSDNode>(Divisor);
+ if (C && C->getAPIntValue().isPowerOf2() && !C->isZero()) {
+ auto *Shamt = dyn_cast<ConstantSDNode>(Shl1);
+ if (Shamt && Shamt->getAPIntValue() == C->getAPIntValue().logBase2()) {
+ return SDValue(Shl0.getNode(), 1);
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
// Since it may not be valid to emit a fold to zero for vector initializers
// check if we can before folding.
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
@@ -4094,6 +4148,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue V = foldSubToUSubSat(VT, N, DL))
return V;
+ if (SDValue V = foldSubOfQuotientToRem(N, DAG))
+ return V;
+
// (A - B) - 1 -> add (xor B, -1), A
if (sd_match(N, m_Sub(m_OneUse(m_Sub(m_Value(A), m_Value(B))), m_One())))
return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
index fc624aef0fcdd..f21d172dac29b 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
@@ -11,25 +11,20 @@
define i8 @scalar_i8(i8 %x, i8 %y, ptr %divdst) nounwind {
; X86-LABEL: scalar_i8:
; X86: # %bb.0:
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: idivb {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movsbl %cl, %eax
-; X86-NEXT: idivb %ch
+; X86-NEXT: movsbl %ah, %ecx
; X86-NEXT: movb %al, (%edx)
-; X86-NEXT: mulb %ch
-; X86-NEXT: subb %al, %cl
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: scalar_i8:
; X64: # %bb.0:
-; X64-NEXT: movsbl %dil, %ecx
-; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: movsbl %dil, %eax
; X64-NEXT: idivb %sil
+; X64-NEXT: movsbl %ah, %ecx
; X64-NEXT: movb %al, (%rdx)
-; X64-NEXT: mulb %sil
-; X64-NEXT: subb %al, %cl
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
%div = sdiv i8 %x, %y
@@ -42,34 +37,23 @@ define i8 @scalar_i8(i8 %x, i8 %y, ptr %divdst) nounwind {
define i16 @scalar_i16(i16 %x, i16 %y, ptr %divdst) nounwind {
; X86-LABEL: scalar_i16:
; X86: # %bb.0:
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cwtd
-; X86-NEXT: idivw %si
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: movw %ax, (%edi)
-; X86-NEXT: imull %eax, %esi
-; X86-NEXT: subl %esi, %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
+; X86-NEXT: idivw {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movw %ax, (%ecx)
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: scalar_i16:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movl %edi, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: cwtd
; X64-NEXT: idivw %si
-; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: movw %ax, (%rcx)
-; X64-NEXT: imull %eax, %esi
-; X64-NEXT: subl %esi, %edi
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl %edx, %eax
; X64-NEXT: retq
%div = sdiv i16 %x, %y
store i16 %div, ptr %divdst, align 4
@@ -81,20 +65,12 @@ define i16 @scalar_i16(i16 %x, i16 %y, ptr %divdst) nounwind {
define i32 @scalar_i32(i32 %x, i32 %y, ptr %divdst) nounwind {
; X86-LABEL: scalar_i32:
; X86: # %bb.0:
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cltd
-; X86-NEXT: idivl %edi
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: imull %edi, %eax
-; X86-NEXT: subl %eax, %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
+; X86-NEXT: idivl {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %eax, (%ecx)
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: scalar_i32:
@@ -104,9 +80,7 @@ define i32 @scalar_i32(i32 %x, i32 %y, ptr %divdst) nounwind {
; X64-NEXT: cltd
; X64-NEXT: idivl %esi
; X64-NEXT: movl %eax, (%rcx)
-; X64-NEXT: imull %esi, %eax
-; X64-NEXT: subl %eax, %edi
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl %edx, %eax
; X64-NEXT: retq
%div = sdiv i32 %x, %y
store i32 %div, ptr %divdst, align 4
@@ -158,9 +132,7 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind {
; X64-NEXT: cqto
; X64-NEXT: idivq %rsi
; X64-NEXT: movq %rax, (%rcx)
-; X64-NEXT: imulq %rsi, %rax
-; X64-NEXT: subq %rax, %rdi
-; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: movq %rdx, %rax
; X64-NEXT: retq
%div = sdiv i64 %x, %y
store i64 %div, ptr %divdst, align 4
@@ -1199,34 +1171,23 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y, ptr %divdst) nounw
define i32 @scalar_i32_commutative(i32 %x, ptr %ysrc, ptr %divdst) nounwind {
; X86-LABEL: scalar_i32_commutative:
; X86: # %bb.0:
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %edi
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cltd
-; X86-NEXT: idivl %edi
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: imull %eax, %edi
-; X86-NEXT: subl %edi, %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
+; X86-NEXT: idivl (%ecx)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %eax, (%ecx)
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: scalar_i32_commutative:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movl (%rsi), %esi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: cltd
-; X64-NEXT: idivl %esi
+; X64-NEXT: idivl (%rsi)
; X64-NEXT: movl %eax, (%rcx)
-; X64-NEXT: imull %eax, %esi
-; X64-NEXT: subl %esi, %edi
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl %edx, %eax
; X64-NEXT: retq
%y = load i32, ptr %ysrc, align 4
%div = sdiv i32 %x, %y
@@ -1240,24 +1201,20 @@ define i32 @scalar_i32_commutative(i32 %x, ptr %ysrc, ptr %divdst) nounwind {
define i32 @extrause(i32 %x, i32 %y, ptr %divdst, ptr %t1dst) nounwind {
; X86-LABEL: extrause:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ecx, %eax
; X86-NEXT: cltd
-; X86-NEXT: idivl %ebx
+; X86-NEXT: idivl %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %eax, (%edi)
-; X86-NEXT: imull %ebx, %eax
+; X86-NEXT: imull %ecx, %eax
; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: subl %eax, %ecx
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: extrause:
@@ -1269,8 +1226,7 @@ define i32 @extrause(i32 %x, i32 %y, ptr %divdst, ptr %t1dst) nounwind {
; X64-NEXT: movl %eax, (%r8)
; X64-NEXT: imull %esi, %eax
; X64-NEXT: movl %eax, (%rcx)
-; X64-NEXT: subl %eax, %edi
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl %edx, %eax
; X64-NEXT: retq
%div = sdiv i32 %x, %y
store i32 %div, ptr %divdst, align 4
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
index 54d7f49cd4323..0bc141bf6bb90 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -11,25 +11,20 @@
define i8 @scalar_i8(i8 %x, i8 %y, ptr %divdst) nounwind {
; X86-LABEL: scalar_i8:
; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: divb {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: divb %ch
+; X86-NEXT: movzbl %ah, %ecx
; X86-NEXT: movb %al, (%edx)
-; X86-NEXT: mulb %ch
-; X86-NEXT: subb %al, %cl
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: scalar_i8:
; X64: # %bb.0:
-; X64-NEXT: movzbl %dil, %ecx
-; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: divb %sil
+; X64-NEXT: movzbl %ah, %ecx
; X64-NEXT: movb %al, (%rdx)
-; X64-NEXT: mulb %sil
-; X64-NEXT: subb %al, %cl
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
%div = udiv i8 %x, %y
@@ -42,34 +37,23 @@ define i8 @scalar_i8(i8 %x, i8 %y, ptr %divdst) nounwind {
define i16 @scalar_i16(i16 %x, i16 %y, ptr %divdst) nounwind {
; X86-LABEL: scalar_i16:
; X86: # %bb.0:
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: divw %si
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: movw %ax, (%edi)
-; X86-NEXT: imull %eax, %esi
-; X86-NEXT: subl %esi, %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
+; X86-NEXT: divw {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movw %ax, (%ecx)
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: scalar_i16:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movl %edi, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divw %si
-; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: movw %ax, (%rcx)
-; X64-NEXT: imull %eax, %esi
-; X64-NEXT: subl %esi, %edi
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl %edx, %eax
; X64-NEXT: retq
%div = udiv i16 %x, %y
store i16 %div, ptr %divdst, align 4
@@ -81,20 +65,12 @@ define i16 @scalar_i16(i16 %x, i16 %y, ptr %divdst) nounwind {
define i32 @scalar_i32(i32 %x, i32 %y, ptr %divdst) nounwind {
; X86-LABEL: scalar_i32:
; X86: # %bb.0:
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: divl %edi
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: imull %edi, %eax
-; X86-NEXT: subl %eax, %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
+; X86-NEXT: divl {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %eax, (%ecx)
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: scalar_i32:
@@ -104,9 +80,7 @@ define i32 @scalar_i32(i32 %x, i32 %y, ptr %divdst) nounwind {
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %esi
; X64-NEXT: movl %eax, (%rcx)
-; X64-NEXT: imull %esi, %eax
-; X64-NEXT: subl %eax, %edi
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl %edx, %eax
; X64-NEXT: retq
%div = udiv i32 %x, %y
store i32 %div, ptr %divdst, align 4
@@ -158,9 +132,7 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind {
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rsi
; X64-NEXT: movq %rax, (%rcx)
-; X64-NEXT: imulq %rsi, %rax
-; X64-NEXT: subq %rax, %rdi
-; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: movq %rdx, %rax
; X64-NEXT: retq
%div = udiv i64 %x, %y
store i64 %div, ptr %divdst, align 4
@@ -1153,34 +1125,23 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y, ptr %divdst) nounw
define i32 @scalar_i32_commutative(i32 %x, ptr %ysrc, ptr %divdst) nounwind {
; X86-LABEL: scalar_i32_commutative:
; X86: # %bb.0:
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %edi
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: divl %edi
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: imull %eax, %edi
-; X86-NEXT: subl %edi, %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
+; X86-NEXT: divl (%ecx)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %eax, (%ecx)
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: scalar_i32_commutative:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movl (%rsi), %esi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: divl %esi
+; X64-NEXT: divl (%rsi)
; X64-NEXT: movl %eax, (%rcx)
-; X64-NEXT: imull %eax, %esi
-; X64-NEXT: subl %esi, %edi
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl %edx, %eax
; X64-NEXT: retq
%y = load i32, ptr %ysrc, align 4
%div = udiv i32 %x, %y
@@ -1194,24 +1155,20 @@ define i32 @scalar_i32_commutative(i32 %x, ptr %ysrc, ptr %divdst) nounwind {
define i32 @extrause(i32 %x, i32 %y, ptr %divdst, ptr %t1dst) nounwind {
; X86-LABEL: extrause:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ecx, %eax
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: divl %ebx
+; X86-NEXT: divl %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %eax, (%edi)
-; X86-NEXT: imull %ebx, %eax
+; X86-NEXT: imull %ecx, %eax
; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: subl %eax, %ecx
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: extrause:
@@ -1223,8 +1180,7 @@ define i32 @extrause(i32 %x, i32 %y, ptr %divdst, ptr %t1dst) nounwind {
; X64-NEXT: movl %eax, (%r8)
; X64-NEXT: imull %esi, %eax
; X64-NEXT: movl %eax, (%rcx)
-; X64-NEXT: subl %eax, %edi
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl %edx, %eax
; X64-NEXT: retq
%div = udiv i32 %x, %y
store i32 %div, ptr %divdst, align 4
More information about the llvm-commits
mailing list