[llvm] [DAGCombiner] Fix exact power-of-two signed division for large integers (PR #177340)
Steffen Larsen via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 26 01:04:49 PST 2026
https://github.com/steffenlarsen updated https://github.com/llvm/llvm-project/pull/177340
>From ad8214dc3344a398051c8919d6ee4512777da7c2 Mon Sep 17 00:00:00 2001
From: Steffen Holst Larsen <HolstLarsen.Steffen at amd.com>
Date: Thu, 22 Jan 2026 04:27:41 -0600
Subject: [PATCH 1/2] [DAGCombiner] Fix exact power-of-two signed division for
large integers
Previously, the DAG combiner did not optimize exact signed division by a
power-of-two constant divisor for integer types exceeding the size of
division supported by the target architecture (e.g., i128 on x86-64).
However, such an optimization was expected by the division expansion
logic, leading to unsupported division operations making it to
instruction selection.
This commit addresses this issue by making an exception to the existing
exclusion of signed division with the exact flag for the aforementioned
operations. That is, the DAG combiner will now optimize exact signed
division if the divisor is a power-of-two constant and the integer type
exceeds the size of division supported by the target architecture.
Signed-off-by: Steffen Holst Larsen <HolstLarsen.Steffen at amd.com>
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 +-
llvm/test/CodeGen/AMDGPU/div_i128.ll | 182 +++++++
llvm/test/CodeGen/X86/div_i129_v_pow2k.ll | 481 ++++++++++++++++++
3 files changed, 669 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/div_i129_v_pow2k.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6cf3699a15a44..cbe25eb76e5c3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5232,12 +5232,16 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N->getValueType(0);
EVT CCVT = getSetCCResultType(VT);
unsigned BitWidth = VT.getScalarSizeInBits();
+ unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported();
// fold (sdiv X, pow2) -> simple ops after legalize
// FIXME: We check for the exact bit here because the generic lowering gives
// better results in that case. The target-specific lowering should learn how
- // to handle exact sdivs efficiently.
- if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
+ // to handle exact sdivs efficiently. An exception is made for large bitwidths
+ // exceeding what the target can natively support, as division expansion was
+ // skipped in favor of this optimization.
+ if ((!N->getFlags().hasExact() || BitWidth > MaxLegalDivRemBitWidth) &&
+ isDivisorPowerOfTwo(N1)) {
// Target-specific implementation of sdiv x, pow2.
if (SDValue Res = BuildSDIVPow2(N))
return Res;
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index d5b5ab6e457f9..5a4aa4effac00 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -4373,6 +4373,115 @@ define i128 @v_sdiv_i128_v_pow2k(i128 %lhs) {
ret i128 %div
}
+define i128 @v_sdiv_exact_i128_v_pow2k(i128 %lhs) {
+; GFX9-LABEL: v_sdiv_exact_i128_v_pow2k:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v3
+; GFX9-NEXT: v_mov_b32_e32 v5, v4
+; GFX9-NEXT: v_lshrrev_b64 v[4:5], 31, v[4:5]
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4
+; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v5, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[2:3]
+; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v4
+; GFX9-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX9-NEXT: v_ashrrev_i32_e32 v2, 1, v3
+; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v3
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-O0-LABEL: v_sdiv_exact_i128_v_pow2k:
+; GFX9-O0: ; %bb.0:
+; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
+; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
+; GFX9-O0-NEXT: s_mov_b32 s4, 63
+; GFX9-O0-NEXT: v_ashrrev_i64 v[4:5], s4, v[4:5]
+; GFX9-O0-NEXT: s_mov_b32 s5, 31
+; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], s5, v[4:5]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v7
+; GFX9-O0-NEXT: s_mov_b64 s[8:9], 0
+; GFX9-O0-NEXT: s_mov_b32 s6, s8
+; GFX9-O0-NEXT: s_mov_b32 s4, s9
+; GFX9-O0-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v4, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s6
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v5, vcc, v2, v4, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, s4
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
+; GFX9-O0-NEXT: s_mov_b32 s4, 33
+; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_lshl_or_b32 v0, v2, s5, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v6
+; GFX9-O0-NEXT: v_ashrrev_i64 v[3:4], s4, v[3:4]
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6
+; GFX9-O0-NEXT: s_mov_b32 s4, 1
+; GFX9-O0-NEXT: v_alignbit_b32 v1, v1, v2, s4
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
+; GFX9-O0-NEXT: s_mov_b32 s4, 32
+; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s4, v[3:4]
+; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
+; GFX9-O0-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-G-LABEL: v_sdiv_exact_i128_v_pow2k:
+; GFX9-G: ; %bb.0:
+; GFX9-G-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-G-NEXT: v_mov_b32_e32 v4, v1
+; GFX9-G-NEXT: v_lshlrev_b64 v[0:1], 31, v[2:3]
+; GFX9-G-NEXT: v_lshrrev_b32_e32 v2, 1, v4
+; GFX9-G-NEXT: v_ashrrev_i32_e32 v4, 31, v3
+; GFX9-G-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX9-G-NEXT: v_ashrrev_i32_e32 v2, 1, v3
+; GFX9-G-NEXT: v_mov_b32_e32 v3, v4
+; GFX9-G-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-G-O0-LABEL: v_sdiv_exact_i128_v_pow2k:
+; GFX9-G-O0: ; %bb.0:
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v3
+; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v4
+; GFX9-G-O0-NEXT: s_mov_b32 s4, 1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v2, v0, v1
+; GFX9-G-O0-NEXT: s_mov_b32 s4, 31
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[5:6], v0, v[5:6]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v6
+; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v2
+; GFX9-G-O0-NEXT: s_mov_b32 s4, 31
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4
+; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v3, v2, v4
+; GFX9-G-O0-NEXT: s_mov_b32 s4, 1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4
+; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v2, v2, v4
+; GFX9-G-O0-NEXT: s_setpc_b64 s[30:31]
+ %div = sdiv exact i128 %lhs, 8589934592
+ ret i128 %div
+}
+
define i128 @v_udiv_i128_v_pow2k(i128 %lhs) {
; GFX9-LABEL: v_udiv_i128_v_pow2k:
; GFX9: ; %bb.0:
@@ -4445,3 +4554,76 @@ define i128 @v_udiv_i128_v_pow2k(i128 %lhs) {
%div = udiv i128 %lhs, 8589934592
ret i128 %div
}
+
+define i128 @v_udiv_exact_i128_v_pow2k(i128 %lhs) {
+; GFX9-LABEL: v_udiv_exact_i128_v_pow2k:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v4, v1
+; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[2:3]
+; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v4
+; GFX9-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v3
+; GFX9-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-O0-LABEL: v_udiv_exact_i128_v_pow2k:
+; GFX9-O0: ; %bb.0:
+; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v5
+; GFX9-O0-NEXT: s_mov_b32 s4, 33
+; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: s_mov_b32 s5, 31
+; GFX9-O0-NEXT: v_lshl_or_b32 v0, v4, s5, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v6
+; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[1:2]
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6
+; GFX9-O0-NEXT: s_mov_b32 s4, 1
+; GFX9-O0-NEXT: v_alignbit_b32 v1, v1, v4, s4
+; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $vgpr2_vgpr3 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-O0-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-G-LABEL: v_udiv_exact_i128_v_pow2k:
+; GFX9-G: ; %bb.0:
+; GFX9-G-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-G-NEXT: v_mov_b32_e32 v4, v1
+; GFX9-G-NEXT: v_lshlrev_b64 v[0:1], 31, v[2:3]
+; GFX9-G-NEXT: v_lshrrev_b32_e32 v2, 1, v4
+; GFX9-G-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX9-G-NEXT: v_lshrrev_b32_e32 v2, 1, v3
+; GFX9-G-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-G-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-G-O0-LABEL: v_udiv_exact_i128_v_pow2k:
+; GFX9-G-O0: ; %bb.0:
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v2
+; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-G-O0-NEXT: s_mov_b32 s4, 1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v2, v0, v1
+; GFX9-G-O0-NEXT: s_mov_b32 s4, 31
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[4:5], v0, v[4:5]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v4
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v5
+; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v2
+; GFX9-G-O0-NEXT: s_mov_b32 s4, 1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4
+; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v2, v2, v3
+; GFX9-G-O0-NEXT: s_mov_b32 s4, 0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s4
+; GFX9-G-O0-NEXT: s_setpc_b64 s[30:31]
+ %div = udiv exact i128 %lhs, 8589934592
+ ret i128 %div
+}
diff --git a/llvm/test/CodeGen/X86/div_i129_v_pow2k.ll b/llvm/test/CodeGen/X86/div_i129_v_pow2k.ll
new file mode 100644
index 0000000000000..5ffb4df7c88e2
--- /dev/null
+++ b/llvm/test/CodeGen/X86/div_i129_v_pow2k.ll
@@ -0,0 +1,481 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-- -O0 | FileCheck %s --check-prefix=X64-O0
+; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=i686-- -O0 | FileCheck %s --check-prefix=X32-O0
+
+define i129 @v_sdiv_i129_v_pow2k(i129 %lhs) {
+; X64-LABEL: v_sdiv_i129_v_pow2k:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: andl $1, %edx
+; X64-NEXT: negq %rdx
+; X64-NEXT: movl %edx, %eax
+; X64-NEXT: andl $1, %eax
+; X64-NEXT: shldq $32, %rdx, %rax
+; X64-NEXT: addq %rdi, %rax
+; X64-NEXT: adcq $0, %rsi
+; X64-NEXT: adcq $0, %rcx
+; X64-NEXT: shrdq $33, %rsi, %rax
+; X64-NEXT: andl $1, %ecx
+; X64-NEXT: movq %rcx, %rdx
+; X64-NEXT: negq %rdx
+; X64-NEXT: shldq $31, %rsi, %rdx
+; X64-NEXT: retq
+;
+; X64-O0-LABEL: v_sdiv_i129_v_pow2k:
+; X64-O0: # %bb.0:
+; X64-O0-NEXT: movl %edx, %eax
+; X64-O0-NEXT: andl $1, %eax
+; X64-O0-NEXT: movl %eax, %ecx
+; X64-O0-NEXT: negq %rcx
+; X64-O0-NEXT: movl %ecx, %r8d
+; X64-O0-NEXT: andl $1, %r8d
+; X64-O0-NEXT: # implicit-def: $rax
+; X64-O0-NEXT: movl %r8d, %eax
+; X64-O0-NEXT: shldq $32, %rcx, %rax
+; X64-O0-NEXT: addq %rax, %rdi
+; X64-O0-NEXT: adcq $0, %rsi
+; X64-O0-NEXT: adcq $0, %rdx
+; X64-O0-NEXT: movq %rsi, %rax
+; X64-O0-NEXT: shldq $31, %rdi, %rax
+; X64-O0-NEXT: movl %edx, %ecx
+; X64-O0-NEXT: andl $1, %ecx
+; X64-O0-NEXT: # kill: def $rcx killed $ecx
+; X64-O0-NEXT: movq %rcx, %rdx
+; X64-O0-NEXT: negq %rdx
+; X64-O0-NEXT: shldq $31, %rsi, %rdx
+; X64-O0-NEXT: retq
+;
+; X32-LABEL: v_sdiv_i129_v_pow2k:
+; X32: # %bb.0:
+; X32-NEXT: pushl %ebx
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: pushl %edi
+; X32-NEXT: .cfi_def_cfa_offset 12
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: .cfi_offset %esi, -16
+; X32-NEXT: .cfi_offset %edi, -12
+; X32-NEXT: .cfi_offset %ebx, -8
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl %ecx, %ebx
+; X32-NEXT: andl $1, %ebx
+; X32-NEXT: negl %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movl %ebx, %edi
+; X32-NEXT: andl $1, %edi
+; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx
+; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: adcl $0, %edx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl %ecx, %ebx
+; X32-NEXT: shldl $31, %edx, %ebx
+; X32-NEXT: shldl $31, %esi, %edx
+; X32-NEXT: shldl $31, %edi, %esi
+; X32-NEXT: andl $1, %ecx
+; X32-NEXT: movl %ecx, %edi
+; X32-NEXT: negl %edi
+; X32-NEXT: movl %esi, (%eax)
+; X32-NEXT: movl %edx, 4(%eax)
+; X32-NEXT: movl %ebx, 8(%eax)
+; X32-NEXT: movl %edi, 12(%eax)
+; X32-NEXT: movb %cl, 16(%eax)
+; X32-NEXT: popl %esi
+; X32-NEXT: .cfi_def_cfa_offset 12
+; X32-NEXT: popl %edi
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: popl %ebx
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: retl $4
+;
+; X32-O0-LABEL: v_sdiv_i129_v_pow2k:
+; X32-O0: # %bb.0:
+; X32-O0-NEXT: pushl %ebp
+; X32-O0-NEXT: .cfi_def_cfa_offset 8
+; X32-O0-NEXT: pushl %ebx
+; X32-O0-NEXT: .cfi_def_cfa_offset 12
+; X32-O0-NEXT: pushl %edi
+; X32-O0-NEXT: .cfi_def_cfa_offset 16
+; X32-O0-NEXT: pushl %esi
+; X32-O0-NEXT: .cfi_def_cfa_offset 20
+; X32-O0-NEXT: subl $8, %esp
+; X32-O0-NEXT: .cfi_def_cfa_offset 28
+; X32-O0-NEXT: .cfi_offset %esi, -20
+; X32-O0-NEXT: .cfi_offset %edi, -16
+; X32-O0-NEXT: .cfi_offset %ebx, -12
+; X32-O0-NEXT: .cfi_offset %ebp, -8
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-O0-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-O0-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-O0-NEXT: movl %edx, %ecx
+; X32-O0-NEXT: andl $1, %ecx
+; X32-O0-NEXT: negl %ecx
+; X32-O0-NEXT: movl %ecx, %edi
+; X32-O0-NEXT: andl $1, %edi
+; X32-O0-NEXT: addl %ecx, %eax
+; X32-O0-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-O0-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-O0-NEXT: adcl %edi, %esi
+; X32-O0-NEXT: adcl $0, %ebp
+; X32-O0-NEXT: adcl $0, %ebx
+; X32-O0-NEXT: adcl $0, %edx
+; X32-O0-NEXT: movl %edx, %edi
+; X32-O0-NEXT: shldl $31, %ebx, %edi
+; X32-O0-NEXT: shldl $31, %ebp, %ebx
+; X32-O0-NEXT: shldl $31, %esi, %ebp
+; X32-O0-NEXT: andl $1, %edx
+; X32-O0-NEXT: movl %edx, %esi
+; X32-O0-NEXT: negl %esi
+; X32-O0-NEXT: movl %ebp, (%ecx)
+; X32-O0-NEXT: movl %ebx, 4(%ecx)
+; X32-O0-NEXT: movl %edi, 8(%ecx)
+; X32-O0-NEXT: movl %esi, 12(%ecx)
+; X32-O0-NEXT: # kill: def $dl killed $dl killed $edx
+; X32-O0-NEXT: movb %dl, 16(%ecx)
+; X32-O0-NEXT: addl $8, %esp
+; X32-O0-NEXT: .cfi_def_cfa_offset 20
+; X32-O0-NEXT: popl %esi
+; X32-O0-NEXT: .cfi_def_cfa_offset 16
+; X32-O0-NEXT: popl %edi
+; X32-O0-NEXT: .cfi_def_cfa_offset 12
+; X32-O0-NEXT: popl %ebx
+; X32-O0-NEXT: .cfi_def_cfa_offset 8
+; X32-O0-NEXT: popl %ebp
+; X32-O0-NEXT: .cfi_def_cfa_offset 4
+; X32-O0-NEXT: retl $4
+ %div = sdiv i129 %lhs, 8589934592
+ ret i129 %div
+}
+
+define i129 @v_sdiv_exact_i129_v_pow2k(i129 %lhs) {
+; X64-LABEL: v_sdiv_exact_i129_v_pow2k:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: andl $1, %edx
+; X64-NEXT: negq %rdx
+; X64-NEXT: movl %edx, %eax
+; X64-NEXT: andl $1, %eax
+; X64-NEXT: shldq $32, %rdx, %rax
+; X64-NEXT: addq %rdi, %rax
+; X64-NEXT: adcq $0, %rsi
+; X64-NEXT: adcq $0, %rcx
+; X64-NEXT: shrdq $33, %rsi, %rax
+; X64-NEXT: andl $1, %ecx
+; X64-NEXT: movq %rcx, %rdx
+; X64-NEXT: negq %rdx
+; X64-NEXT: shldq $31, %rsi, %rdx
+; X64-NEXT: retq
+;
+; X64-O0-LABEL: v_sdiv_exact_i129_v_pow2k:
+; X64-O0: # %bb.0:
+; X64-O0-NEXT: movl %edx, %eax
+; X64-O0-NEXT: andl $1, %eax
+; X64-O0-NEXT: movl %eax, %ecx
+; X64-O0-NEXT: negq %rcx
+; X64-O0-NEXT: movl %ecx, %r8d
+; X64-O0-NEXT: andl $1, %r8d
+; X64-O0-NEXT: # implicit-def: $rax
+; X64-O0-NEXT: movl %r8d, %eax
+; X64-O0-NEXT: shldq $32, %rcx, %rax
+; X64-O0-NEXT: addq %rax, %rdi
+; X64-O0-NEXT: adcq $0, %rsi
+; X64-O0-NEXT: adcq $0, %rdx
+; X64-O0-NEXT: movq %rsi, %rax
+; X64-O0-NEXT: shldq $31, %rdi, %rax
+; X64-O0-NEXT: movl %edx, %ecx
+; X64-O0-NEXT: andl $1, %ecx
+; X64-O0-NEXT: # kill: def $rcx killed $ecx
+; X64-O0-NEXT: movq %rcx, %rdx
+; X64-O0-NEXT: negq %rdx
+; X64-O0-NEXT: shldq $31, %rsi, %rdx
+; X64-O0-NEXT: retq
+;
+; X32-LABEL: v_sdiv_exact_i129_v_pow2k:
+; X32: # %bb.0:
+; X32-NEXT: pushl %ebx
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: pushl %edi
+; X32-NEXT: .cfi_def_cfa_offset 12
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: .cfi_offset %esi, -16
+; X32-NEXT: .cfi_offset %edi, -12
+; X32-NEXT: .cfi_offset %ebx, -8
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl %ecx, %ebx
+; X32-NEXT: andl $1, %ebx
+; X32-NEXT: negl %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movl %ebx, %edi
+; X32-NEXT: andl $1, %edi
+; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx
+; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: adcl $0, %edx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl %ecx, %ebx
+; X32-NEXT: shldl $31, %edx, %ebx
+; X32-NEXT: shldl $31, %esi, %edx
+; X32-NEXT: shldl $31, %edi, %esi
+; X32-NEXT: andl $1, %ecx
+; X32-NEXT: movl %ecx, %edi
+; X32-NEXT: negl %edi
+; X32-NEXT: movl %esi, (%eax)
+; X32-NEXT: movl %edx, 4(%eax)
+; X32-NEXT: movl %ebx, 8(%eax)
+; X32-NEXT: movl %edi, 12(%eax)
+; X32-NEXT: movb %cl, 16(%eax)
+; X32-NEXT: popl %esi
+; X32-NEXT: .cfi_def_cfa_offset 12
+; X32-NEXT: popl %edi
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: popl %ebx
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: retl $4
+;
+; X32-O0-LABEL: v_sdiv_exact_i129_v_pow2k:
+; X32-O0: # %bb.0:
+; X32-O0-NEXT: pushl %ebp
+; X32-O0-NEXT: .cfi_def_cfa_offset 8
+; X32-O0-NEXT: pushl %ebx
+; X32-O0-NEXT: .cfi_def_cfa_offset 12
+; X32-O0-NEXT: pushl %edi
+; X32-O0-NEXT: .cfi_def_cfa_offset 16
+; X32-O0-NEXT: pushl %esi
+; X32-O0-NEXT: .cfi_def_cfa_offset 20
+; X32-O0-NEXT: subl $8, %esp
+; X32-O0-NEXT: .cfi_def_cfa_offset 28
+; X32-O0-NEXT: .cfi_offset %esi, -20
+; X32-O0-NEXT: .cfi_offset %edi, -16
+; X32-O0-NEXT: .cfi_offset %ebx, -12
+; X32-O0-NEXT: .cfi_offset %ebp, -8
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-O0-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-O0-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-O0-NEXT: movl %edx, %ecx
+; X32-O0-NEXT: andl $1, %ecx
+; X32-O0-NEXT: negl %ecx
+; X32-O0-NEXT: movl %ecx, %edi
+; X32-O0-NEXT: andl $1, %edi
+; X32-O0-NEXT: addl %ecx, %eax
+; X32-O0-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-O0-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-O0-NEXT: adcl %edi, %esi
+; X32-O0-NEXT: adcl $0, %ebp
+; X32-O0-NEXT: adcl $0, %ebx
+; X32-O0-NEXT: adcl $0, %edx
+; X32-O0-NEXT: movl %edx, %edi
+; X32-O0-NEXT: shldl $31, %ebx, %edi
+; X32-O0-NEXT: shldl $31, %ebp, %ebx
+; X32-O0-NEXT: shldl $31, %esi, %ebp
+; X32-O0-NEXT: andl $1, %edx
+; X32-O0-NEXT: movl %edx, %esi
+; X32-O0-NEXT: negl %esi
+; X32-O0-NEXT: movl %ebp, (%ecx)
+; X32-O0-NEXT: movl %ebx, 4(%ecx)
+; X32-O0-NEXT: movl %edi, 8(%ecx)
+; X32-O0-NEXT: movl %esi, 12(%ecx)
+; X32-O0-NEXT: # kill: def $dl killed $dl killed $edx
+; X32-O0-NEXT: movb %dl, 16(%ecx)
+; X32-O0-NEXT: addl $8, %esp
+; X32-O0-NEXT: .cfi_def_cfa_offset 20
+; X32-O0-NEXT: popl %esi
+; X32-O0-NEXT: .cfi_def_cfa_offset 16
+; X32-O0-NEXT: popl %edi
+; X32-O0-NEXT: .cfi_def_cfa_offset 12
+; X32-O0-NEXT: popl %ebx
+; X32-O0-NEXT: .cfi_def_cfa_offset 8
+; X32-O0-NEXT: popl %ebp
+; X32-O0-NEXT: .cfi_def_cfa_offset 4
+; X32-O0-NEXT: retl $4
+ %div = sdiv exact i129 %lhs, 8589934592
+ ret i129 %div
+}
+
+define i129 @v_udiv_i129_v_pow2k(i129 %lhs) {
+; X64-LABEL: v_udiv_i129_v_pow2k:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: andl $1, %edx
+; X64-NEXT: shrdq $33, %rsi, %rax
+; X64-NEXT: shldq $31, %rsi, %rdx
+; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: retq
+;
+; X64-O0-LABEL: v_udiv_i129_v_pow2k:
+; X64-O0: # %bb.0:
+; X64-O0-NEXT: movq %rsi, %rax
+; X64-O0-NEXT: shldq $31, %rdi, %rax
+; X64-O0-NEXT: movl %edx, %ecx
+; X64-O0-NEXT: andl $1, %ecx
+; X64-O0-NEXT: movl %ecx, %edx
+; X64-O0-NEXT: shldq $31, %rsi, %rdx
+; X64-O0-NEXT: xorl %ecx, %ecx
+; X64-O0-NEXT: # kill: def $rcx killed $ecx
+; X64-O0-NEXT: retq
+;
+; X32-LABEL: v_udiv_i129_v_pow2k:
+; X32: # %bb.0:
+; X32-NEXT: pushl %edi
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 12
+; X32-NEXT: .cfi_offset %esi, -12
+; X32-NEXT: .cfi_offset %edi, -8
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: shrdl $1, %esi, %edx
+; X32-NEXT: shldl $31, %edi, %ecx
+; X32-NEXT: shldl $31, %esi, %edi
+; X32-NEXT: movl %ecx, 8(%eax)
+; X32-NEXT: movl %edi, 4(%eax)
+; X32-NEXT: movl %edx, (%eax)
+; X32-NEXT: movl $0, 12(%eax)
+; X32-NEXT: movb $0, 16(%eax)
+; X32-NEXT: popl %esi
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: popl %edi
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: retl $4
+;
+; X32-O0-LABEL: v_udiv_i129_v_pow2k:
+; X32-O0: # %bb.0:
+; X32-O0-NEXT: pushl %ebx
+; X32-O0-NEXT: .cfi_def_cfa_offset 8
+; X32-O0-NEXT: pushl %edi
+; X32-O0-NEXT: .cfi_def_cfa_offset 12
+; X32-O0-NEXT: pushl %esi
+; X32-O0-NEXT: .cfi_def_cfa_offset 16
+; X32-O0-NEXT: .cfi_offset %esi, -16
+; X32-O0-NEXT: .cfi_offset %edi, -12
+; X32-O0-NEXT: .cfi_offset %ebx, -8
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-O0-NEXT: movl %ecx, %eax
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X32-O0-NEXT: shldl $31, %esi, %edx
+; X32-O0-NEXT: shldl $31, %edi, %esi
+; X32-O0-NEXT: shldl $31, %ebx, %edi
+; X32-O0-NEXT: movl %edi, (%ecx)
+; X32-O0-NEXT: movl %esi, 4(%ecx)
+; X32-O0-NEXT: movl %edx, 8(%ecx)
+; X32-O0-NEXT: movl $0, 12(%ecx)
+; X32-O0-NEXT: movb $0, 16(%ecx)
+; X32-O0-NEXT: popl %esi
+; X32-O0-NEXT: .cfi_def_cfa_offset 12
+; X32-O0-NEXT: popl %edi
+; X32-O0-NEXT: .cfi_def_cfa_offset 8
+; X32-O0-NEXT: popl %ebx
+; X32-O0-NEXT: .cfi_def_cfa_offset 4
+; X32-O0-NEXT: retl $4
+ %div = udiv i129 %lhs, 8589934592
+ ret i129 %div
+}
+
+define i129 @v_udiv_exact_i129_v_pow2k(i129 %lhs) {
+; X64-LABEL: v_udiv_exact_i129_v_pow2k:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: andl $1, %edx
+; X64-NEXT: shrdq $33, %rsi, %rax
+; X64-NEXT: shldq $31, %rsi, %rdx
+; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: retq
+;
+; X64-O0-LABEL: v_udiv_exact_i129_v_pow2k:
+; X64-O0: # %bb.0:
+; X64-O0-NEXT: movq %rsi, %rax
+; X64-O0-NEXT: shldq $31, %rdi, %rax
+; X64-O0-NEXT: movl %edx, %ecx
+; X64-O0-NEXT: andl $1, %ecx
+; X64-O0-NEXT: movl %ecx, %edx
+; X64-O0-NEXT: shldq $31, %rsi, %rdx
+; X64-O0-NEXT: xorl %ecx, %ecx
+; X64-O0-NEXT: # kill: def $rcx killed $ecx
+; X64-O0-NEXT: retq
+;
+; X32-LABEL: v_udiv_exact_i129_v_pow2k:
+; X32: # %bb.0:
+; X32-NEXT: pushl %edi
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 12
+; X32-NEXT: .cfi_offset %esi, -12
+; X32-NEXT: .cfi_offset %edi, -8
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: shrdl $1, %esi, %edx
+; X32-NEXT: shldl $31, %edi, %ecx
+; X32-NEXT: shldl $31, %esi, %edi
+; X32-NEXT: movl %ecx, 8(%eax)
+; X32-NEXT: movl %edi, 4(%eax)
+; X32-NEXT: movl %edx, (%eax)
+; X32-NEXT: movl $0, 12(%eax)
+; X32-NEXT: movb $0, 16(%eax)
+; X32-NEXT: popl %esi
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: popl %edi
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: retl $4
+;
+; X32-O0-LABEL: v_udiv_exact_i129_v_pow2k:
+; X32-O0: # %bb.0:
+; X32-O0-NEXT: pushl %ebx
+; X32-O0-NEXT: .cfi_def_cfa_offset 8
+; X32-O0-NEXT: pushl %edi
+; X32-O0-NEXT: .cfi_def_cfa_offset 12
+; X32-O0-NEXT: pushl %esi
+; X32-O0-NEXT: .cfi_def_cfa_offset 16
+; X32-O0-NEXT: .cfi_offset %esi, -16
+; X32-O0-NEXT: .cfi_offset %edi, -12
+; X32-O0-NEXT: .cfi_offset %ebx, -8
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-O0-NEXT: movl %ecx, %eax
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X32-O0-NEXT: shldl $31, %esi, %edx
+; X32-O0-NEXT: shldl $31, %edi, %esi
+; X32-O0-NEXT: shldl $31, %ebx, %edi
+; X32-O0-NEXT: movl %edi, (%ecx)
+; X32-O0-NEXT: movl %esi, 4(%ecx)
+; X32-O0-NEXT: movl %edx, 8(%ecx)
+; X32-O0-NEXT: movl $0, 12(%ecx)
+; X32-O0-NEXT: movb $0, 16(%ecx)
+; X32-O0-NEXT: popl %esi
+; X32-O0-NEXT: .cfi_def_cfa_offset 12
+; X32-O0-NEXT: popl %edi
+; X32-O0-NEXT: .cfi_def_cfa_offset 8
+; X32-O0-NEXT: popl %ebx
+; X32-O0-NEXT: .cfi_def_cfa_offset 4
+; X32-O0-NEXT: retl $4
+ %div = udiv exact i129 %lhs, 8589934592
+ ret i129 %div
+}
>From cf730c94f7d4572d37712c9f098d49a5650bee9f Mon Sep 17 00:00:00 2001
From: Steffen Holst Larsen <HolstLarsen.Steffen at amd.com>
Date: Mon, 26 Jan 2026 03:04:23 -0600
Subject: [PATCH 2/2] Address x86 test comments
Signed-off-by: Steffen Holst Larsen <HolstLarsen.Steffen at amd.com>
---
llvm/test/CodeGen/X86/div_i129_v_pow2k.ll | 588 ++++++++++------------
1 file changed, 256 insertions(+), 332 deletions(-)
diff --git a/llvm/test/CodeGen/X86/div_i129_v_pow2k.ll b/llvm/test/CodeGen/X86/div_i129_v_pow2k.ll
index 5ffb4df7c88e2..4d6d795e3beb8 100644
--- a/llvm/test/CodeGen/X86/div_i129_v_pow2k.ll
+++ b/llvm/test/CodeGen/X86/div_i129_v_pow2k.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-- -O0 | FileCheck %s --check-prefix=X64-O0
-; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=i686-- -O0 | FileCheck %s --check-prefix=X32-O0
+; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=i686-- -O0 | FileCheck %s --check-prefix=X86-O0
-define i129 @v_sdiv_i129_v_pow2k(i129 %lhs) {
+define i129 @v_sdiv_i129_v_pow2k(i129 %lhs) nounwind {
; X64-LABEL: v_sdiv_i129_v_pow2k:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rcx
@@ -47,116 +47,93 @@ define i129 @v_sdiv_i129_v_pow2k(i129 %lhs) {
; X64-O0-NEXT: shldq $31, %rsi, %rdx
; X64-O0-NEXT: retq
;
-; X32-LABEL: v_sdiv_i129_v_pow2k:
-; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: pushl %edi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 16
-; X32-NEXT: .cfi_offset %esi, -16
-; X32-NEXT: .cfi_offset %edi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl %ecx, %ebx
-; X32-NEXT: andl $1, %ebx
-; X32-NEXT: negl %ebx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: movl %ebx, %edi
-; X32-NEXT: andl $1, %edi
-; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: adcl $0, %edx
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl %ecx, %ebx
-; X32-NEXT: shldl $31, %edx, %ebx
-; X32-NEXT: shldl $31, %esi, %edx
-; X32-NEXT: shldl $31, %edi, %esi
-; X32-NEXT: andl $1, %ecx
-; X32-NEXT: movl %ecx, %edi
-; X32-NEXT: negl %edi
-; X32-NEXT: movl %esi, (%eax)
-; X32-NEXT: movl %edx, 4(%eax)
-; X32-NEXT: movl %ebx, 8(%eax)
-; X32-NEXT: movl %edi, 12(%eax)
-; X32-NEXT: movb %cl, 16(%eax)
-; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: popl %edi
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
-; X32-NEXT: retl $4
+; X86-LABEL: v_sdiv_i129_v_pow2k:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: andl $1, %ebx
+; X86-NEXT: negl %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: andl $1, %edi
+; X86-NEXT: addl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: shldl $31, %edx, %ebx
+; X86-NEXT: shldl $31, %esi, %edx
+; X86-NEXT: shldl $31, %edi, %esi
+; X86-NEXT: andl $1, %ecx
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: negl %edi
+; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ebx, 8(%eax)
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: movb %cl, 16(%eax)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl $4
;
-; X32-O0-LABEL: v_sdiv_i129_v_pow2k:
-; X32-O0: # %bb.0:
-; X32-O0-NEXT: pushl %ebp
-; X32-O0-NEXT: .cfi_def_cfa_offset 8
-; X32-O0-NEXT: pushl %ebx
-; X32-O0-NEXT: .cfi_def_cfa_offset 12
-; X32-O0-NEXT: pushl %edi
-; X32-O0-NEXT: .cfi_def_cfa_offset 16
-; X32-O0-NEXT: pushl %esi
-; X32-O0-NEXT: .cfi_def_cfa_offset 20
-; X32-O0-NEXT: subl $8, %esp
-; X32-O0-NEXT: .cfi_def_cfa_offset 28
-; X32-O0-NEXT: .cfi_offset %esi, -20
-; X32-O0-NEXT: .cfi_offset %edi, -16
-; X32-O0-NEXT: .cfi_offset %ebx, -12
-; X32-O0-NEXT: .cfi_offset %ebp, -8
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-O0-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X32-O0-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-O0-NEXT: movl %edx, %ecx
-; X32-O0-NEXT: andl $1, %ecx
-; X32-O0-NEXT: negl %ecx
-; X32-O0-NEXT: movl %ecx, %edi
-; X32-O0-NEXT: andl $1, %edi
-; X32-O0-NEXT: addl %ecx, %eax
-; X32-O0-NEXT: movl (%esp), %ecx # 4-byte Reload
-; X32-O0-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-O0-NEXT: adcl %edi, %esi
-; X32-O0-NEXT: adcl $0, %ebp
-; X32-O0-NEXT: adcl $0, %ebx
-; X32-O0-NEXT: adcl $0, %edx
-; X32-O0-NEXT: movl %edx, %edi
-; X32-O0-NEXT: shldl $31, %ebx, %edi
-; X32-O0-NEXT: shldl $31, %ebp, %ebx
-; X32-O0-NEXT: shldl $31, %esi, %ebp
-; X32-O0-NEXT: andl $1, %edx
-; X32-O0-NEXT: movl %edx, %esi
-; X32-O0-NEXT: negl %esi
-; X32-O0-NEXT: movl %ebp, (%ecx)
-; X32-O0-NEXT: movl %ebx, 4(%ecx)
-; X32-O0-NEXT: movl %edi, 8(%ecx)
-; X32-O0-NEXT: movl %esi, 12(%ecx)
-; X32-O0-NEXT: # kill: def $dl killed $dl killed $edx
-; X32-O0-NEXT: movb %dl, 16(%ecx)
-; X32-O0-NEXT: addl $8, %esp
-; X32-O0-NEXT: .cfi_def_cfa_offset 20
-; X32-O0-NEXT: popl %esi
-; X32-O0-NEXT: .cfi_def_cfa_offset 16
-; X32-O0-NEXT: popl %edi
-; X32-O0-NEXT: .cfi_def_cfa_offset 12
-; X32-O0-NEXT: popl %ebx
-; X32-O0-NEXT: .cfi_def_cfa_offset 8
-; X32-O0-NEXT: popl %ebp
-; X32-O0-NEXT: .cfi_def_cfa_offset 4
-; X32-O0-NEXT: retl $4
+; X86-O0-LABEL: v_sdiv_i129_v_pow2k:
+; X86-O0: # %bb.0:
+; X86-O0-NEXT: pushl %ebp
+; X86-O0-NEXT: pushl %ebx
+; X86-O0-NEXT: pushl %edi
+; X86-O0-NEXT: pushl %esi
+; X86-O0-NEXT: subl $8, %esp
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-O0-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-O0-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-O0-NEXT: movl %edx, %ecx
+; X86-O0-NEXT: andl $1, %ecx
+; X86-O0-NEXT: negl %ecx
+; X86-O0-NEXT: movl %ecx, %edi
+; X86-O0-NEXT: andl $1, %edi
+; X86-O0-NEXT: addl %ecx, %eax
+; X86-O0-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X86-O0-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-O0-NEXT: adcl %edi, %esi
+; X86-O0-NEXT: adcl $0, %ebp
+; X86-O0-NEXT: adcl $0, %ebx
+; X86-O0-NEXT: adcl $0, %edx
+; X86-O0-NEXT: movl %edx, %edi
+; X86-O0-NEXT: shldl $31, %ebx, %edi
+; X86-O0-NEXT: shldl $31, %ebp, %ebx
+; X86-O0-NEXT: shldl $31, %esi, %ebp
+; X86-O0-NEXT: andl $1, %edx
+; X86-O0-NEXT: movl %edx, %esi
+; X86-O0-NEXT: negl %esi
+; X86-O0-NEXT: movl %ebp, (%ecx)
+; X86-O0-NEXT: movl %ebx, 4(%ecx)
+; X86-O0-NEXT: movl %edi, 8(%ecx)
+; X86-O0-NEXT: movl %esi, 12(%ecx)
+; X86-O0-NEXT: # kill: def $dl killed $dl killed $edx
+; X86-O0-NEXT: movb %dl, 16(%ecx)
+; X86-O0-NEXT: addl $8, %esp
+; X86-O0-NEXT: popl %esi
+; X86-O0-NEXT: popl %edi
+; X86-O0-NEXT: popl %ebx
+; X86-O0-NEXT: popl %ebp
+; X86-O0-NEXT: retl $4
%div = sdiv i129 %lhs, 8589934592
ret i129 %div
}
-define i129 @v_sdiv_exact_i129_v_pow2k(i129 %lhs) {
+define i129 @v_sdiv_exact_i129_v_pow2k(i129 %lhs) nounwind {
; X64-LABEL: v_sdiv_exact_i129_v_pow2k:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rcx
@@ -199,116 +176,93 @@ define i129 @v_sdiv_exact_i129_v_pow2k(i129 %lhs) {
; X64-O0-NEXT: shldq $31, %rsi, %rdx
; X64-O0-NEXT: retq
;
-; X32-LABEL: v_sdiv_exact_i129_v_pow2k:
-; X32: # %bb.0:
-; X32-NEXT: pushl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: pushl %edi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 16
-; X32-NEXT: .cfi_offset %esi, -16
-; X32-NEXT: .cfi_offset %edi, -12
-; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl %ecx, %ebx
-; X32-NEXT: andl $1, %ebx
-; X32-NEXT: negl %ebx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: movl %ebx, %edi
-; X32-NEXT: andl $1, %edi
-; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT: adcl {{[0-9]+}}(%esp), %edi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: adcl $0, %edx
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl %ecx, %ebx
-; X32-NEXT: shldl $31, %edx, %ebx
-; X32-NEXT: shldl $31, %esi, %edx
-; X32-NEXT: shldl $31, %edi, %esi
-; X32-NEXT: andl $1, %ecx
-; X32-NEXT: movl %ecx, %edi
-; X32-NEXT: negl %edi
-; X32-NEXT: movl %esi, (%eax)
-; X32-NEXT: movl %edx, 4(%eax)
-; X32-NEXT: movl %ebx, 8(%eax)
-; X32-NEXT: movl %edi, 12(%eax)
-; X32-NEXT: movb %cl, 16(%eax)
-; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: popl %edi
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: popl %ebx
-; X32-NEXT: .cfi_def_cfa_offset 4
-; X32-NEXT: retl $4
+; X86-LABEL: v_sdiv_exact_i129_v_pow2k:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: andl $1, %ebx
+; X86-NEXT: negl %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: andl $1, %edi
+; X86-NEXT: addl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: shldl $31, %edx, %ebx
+; X86-NEXT: shldl $31, %esi, %edx
+; X86-NEXT: shldl $31, %edi, %esi
+; X86-NEXT: andl $1, %ecx
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: negl %edi
+; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ebx, 8(%eax)
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: movb %cl, 16(%eax)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl $4
;
-; X32-O0-LABEL: v_sdiv_exact_i129_v_pow2k:
-; X32-O0: # %bb.0:
-; X32-O0-NEXT: pushl %ebp
-; X32-O0-NEXT: .cfi_def_cfa_offset 8
-; X32-O0-NEXT: pushl %ebx
-; X32-O0-NEXT: .cfi_def_cfa_offset 12
-; X32-O0-NEXT: pushl %edi
-; X32-O0-NEXT: .cfi_def_cfa_offset 16
-; X32-O0-NEXT: pushl %esi
-; X32-O0-NEXT: .cfi_def_cfa_offset 20
-; X32-O0-NEXT: subl $8, %esp
-; X32-O0-NEXT: .cfi_def_cfa_offset 28
-; X32-O0-NEXT: .cfi_offset %esi, -20
-; X32-O0-NEXT: .cfi_offset %edi, -16
-; X32-O0-NEXT: .cfi_offset %ebx, -12
-; X32-O0-NEXT: .cfi_offset %ebp, -8
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-O0-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X32-O0-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-O0-NEXT: movl %edx, %ecx
-; X32-O0-NEXT: andl $1, %ecx
-; X32-O0-NEXT: negl %ecx
-; X32-O0-NEXT: movl %ecx, %edi
-; X32-O0-NEXT: andl $1, %edi
-; X32-O0-NEXT: addl %ecx, %eax
-; X32-O0-NEXT: movl (%esp), %ecx # 4-byte Reload
-; X32-O0-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-O0-NEXT: adcl %edi, %esi
-; X32-O0-NEXT: adcl $0, %ebp
-; X32-O0-NEXT: adcl $0, %ebx
-; X32-O0-NEXT: adcl $0, %edx
-; X32-O0-NEXT: movl %edx, %edi
-; X32-O0-NEXT: shldl $31, %ebx, %edi
-; X32-O0-NEXT: shldl $31, %ebp, %ebx
-; X32-O0-NEXT: shldl $31, %esi, %ebp
-; X32-O0-NEXT: andl $1, %edx
-; X32-O0-NEXT: movl %edx, %esi
-; X32-O0-NEXT: negl %esi
-; X32-O0-NEXT: movl %ebp, (%ecx)
-; X32-O0-NEXT: movl %ebx, 4(%ecx)
-; X32-O0-NEXT: movl %edi, 8(%ecx)
-; X32-O0-NEXT: movl %esi, 12(%ecx)
-; X32-O0-NEXT: # kill: def $dl killed $dl killed $edx
-; X32-O0-NEXT: movb %dl, 16(%ecx)
-; X32-O0-NEXT: addl $8, %esp
-; X32-O0-NEXT: .cfi_def_cfa_offset 20
-; X32-O0-NEXT: popl %esi
-; X32-O0-NEXT: .cfi_def_cfa_offset 16
-; X32-O0-NEXT: popl %edi
-; X32-O0-NEXT: .cfi_def_cfa_offset 12
-; X32-O0-NEXT: popl %ebx
-; X32-O0-NEXT: .cfi_def_cfa_offset 8
-; X32-O0-NEXT: popl %ebp
-; X32-O0-NEXT: .cfi_def_cfa_offset 4
-; X32-O0-NEXT: retl $4
+; X86-O0-LABEL: v_sdiv_exact_i129_v_pow2k:
+; X86-O0: # %bb.0:
+; X86-O0-NEXT: pushl %ebp
+; X86-O0-NEXT: pushl %ebx
+; X86-O0-NEXT: pushl %edi
+; X86-O0-NEXT: pushl %esi
+; X86-O0-NEXT: subl $8, %esp
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-O0-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-O0-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-O0-NEXT: movl %edx, %ecx
+; X86-O0-NEXT: andl $1, %ecx
+; X86-O0-NEXT: negl %ecx
+; X86-O0-NEXT: movl %ecx, %edi
+; X86-O0-NEXT: andl $1, %edi
+; X86-O0-NEXT: addl %ecx, %eax
+; X86-O0-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X86-O0-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-O0-NEXT: adcl %edi, %esi
+; X86-O0-NEXT: adcl $0, %ebp
+; X86-O0-NEXT: adcl $0, %ebx
+; X86-O0-NEXT: adcl $0, %edx
+; X86-O0-NEXT: movl %edx, %edi
+; X86-O0-NEXT: shldl $31, %ebx, %edi
+; X86-O0-NEXT: shldl $31, %ebp, %ebx
+; X86-O0-NEXT: shldl $31, %esi, %ebp
+; X86-O0-NEXT: andl $1, %edx
+; X86-O0-NEXT: movl %edx, %esi
+; X86-O0-NEXT: negl %esi
+; X86-O0-NEXT: movl %ebp, (%ecx)
+; X86-O0-NEXT: movl %ebx, 4(%ecx)
+; X86-O0-NEXT: movl %edi, 8(%ecx)
+; X86-O0-NEXT: movl %esi, 12(%ecx)
+; X86-O0-NEXT: # kill: def $dl killed $dl killed $edx
+; X86-O0-NEXT: movb %dl, 16(%ecx)
+; X86-O0-NEXT: addl $8, %esp
+; X86-O0-NEXT: popl %esi
+; X86-O0-NEXT: popl %edi
+; X86-O0-NEXT: popl %ebx
+; X86-O0-NEXT: popl %ebp
+; X86-O0-NEXT: retl $4
%div = sdiv exact i129 %lhs, 8589934592
ret i129 %div
}
-define i129 @v_udiv_i129_v_pow2k(i129 %lhs) {
+define i129 @v_udiv_i129_v_pow2k(i129 %lhs) nounwind {
; X64-LABEL: v_udiv_i129_v_pow2k:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
@@ -330,71 +284,56 @@ define i129 @v_udiv_i129_v_pow2k(i129 %lhs) {
; X64-O0-NEXT: # kill: def $rcx killed $ecx
; X64-O0-NEXT: retq
;
-; X32-LABEL: v_udiv_i129_v_pow2k:
-; X32: # %bb.0:
-; X32-NEXT: pushl %edi
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: .cfi_offset %esi, -12
-; X32-NEXT: .cfi_offset %edi, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT: shrdl $1, %esi, %edx
-; X32-NEXT: shldl $31, %edi, %ecx
-; X32-NEXT: shldl $31, %esi, %edi
-; X32-NEXT: movl %ecx, 8(%eax)
-; X32-NEXT: movl %edi, 4(%eax)
-; X32-NEXT: movl %edx, (%eax)
-; X32-NEXT: movl $0, 12(%eax)
-; X32-NEXT: movb $0, 16(%eax)
-; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: popl %edi
-; X32-NEXT: .cfi_def_cfa_offset 4
-; X32-NEXT: retl $4
+; X86-LABEL: v_udiv_i129_v_pow2k:
+; X86: # %bb.0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: shrdl $1, %esi, %edx
+; X86-NEXT: shldl $31, %edi, %ecx
+; X86-NEXT: shldl $31, %esi, %edi
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: movl $0, 12(%eax)
+; X86-NEXT: movb $0, 16(%eax)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: retl $4
;
-; X32-O0-LABEL: v_udiv_i129_v_pow2k:
-; X32-O0: # %bb.0:
-; X32-O0-NEXT: pushl %ebx
-; X32-O0-NEXT: .cfi_def_cfa_offset 8
-; X32-O0-NEXT: pushl %edi
-; X32-O0-NEXT: .cfi_def_cfa_offset 12
-; X32-O0-NEXT: pushl %esi
-; X32-O0-NEXT: .cfi_def_cfa_offset 16
-; X32-O0-NEXT: .cfi_offset %esi, -16
-; X32-O0-NEXT: .cfi_offset %edi, -12
-; X32-O0-NEXT: .cfi_offset %ebx, -8
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-O0-NEXT: movl %ecx, %eax
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X32-O0-NEXT: shldl $31, %esi, %edx
-; X32-O0-NEXT: shldl $31, %edi, %esi
-; X32-O0-NEXT: shldl $31, %ebx, %edi
-; X32-O0-NEXT: movl %edi, (%ecx)
-; X32-O0-NEXT: movl %esi, 4(%ecx)
-; X32-O0-NEXT: movl %edx, 8(%ecx)
-; X32-O0-NEXT: movl $0, 12(%ecx)
-; X32-O0-NEXT: movb $0, 16(%ecx)
-; X32-O0-NEXT: popl %esi
-; X32-O0-NEXT: .cfi_def_cfa_offset 12
-; X32-O0-NEXT: popl %edi
-; X32-O0-NEXT: .cfi_def_cfa_offset 8
-; X32-O0-NEXT: popl %ebx
-; X32-O0-NEXT: .cfi_def_cfa_offset 4
-; X32-O0-NEXT: retl $4
+; X86-O0-LABEL: v_udiv_i129_v_pow2k:
+; X86-O0: # %bb.0:
+; X86-O0-NEXT: pushl %ebx
+; X86-O0-NEXT: pushl %edi
+; X86-O0-NEXT: pushl %esi
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-O0-NEXT: movl %ecx, %eax
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-O0-NEXT: shldl $31, %esi, %edx
+; X86-O0-NEXT: shldl $31, %edi, %esi
+; X86-O0-NEXT: shldl $31, %ebx, %edi
+; X86-O0-NEXT: movl %edi, (%ecx)
+; X86-O0-NEXT: movl %esi, 4(%ecx)
+; X86-O0-NEXT: movl %edx, 8(%ecx)
+; X86-O0-NEXT: movl $0, 12(%ecx)
+; X86-O0-NEXT: movb $0, 16(%ecx)
+; X86-O0-NEXT: popl %esi
+; X86-O0-NEXT: popl %edi
+; X86-O0-NEXT: popl %ebx
+; X86-O0-NEXT: retl $4
%div = udiv i129 %lhs, 8589934592
ret i129 %div
}
-define i129 @v_udiv_exact_i129_v_pow2k(i129 %lhs) {
+define i129 @v_udiv_exact_i129_v_pow2k(i129 %lhs) nounwind {
; X64-LABEL: v_udiv_exact_i129_v_pow2k:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
@@ -416,66 +355,51 @@ define i129 @v_udiv_exact_i129_v_pow2k(i129 %lhs) {
; X64-O0-NEXT: # kill: def $rcx killed $ecx
; X64-O0-NEXT: retq
;
-; X32-LABEL: v_udiv_exact_i129_v_pow2k:
-; X32: # %bb.0:
-; X32-NEXT: pushl %edi
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: pushl %esi
-; X32-NEXT: .cfi_def_cfa_offset 12
-; X32-NEXT: .cfi_offset %esi, -12
-; X32-NEXT: .cfi_offset %edi, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT: shrdl $1, %esi, %edx
-; X32-NEXT: shldl $31, %edi, %ecx
-; X32-NEXT: shldl $31, %esi, %edi
-; X32-NEXT: movl %ecx, 8(%eax)
-; X32-NEXT: movl %edi, 4(%eax)
-; X32-NEXT: movl %edx, (%eax)
-; X32-NEXT: movl $0, 12(%eax)
-; X32-NEXT: movb $0, 16(%eax)
-; X32-NEXT: popl %esi
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: popl %edi
-; X32-NEXT: .cfi_def_cfa_offset 4
-; X32-NEXT: retl $4
+; X86-LABEL: v_udiv_exact_i129_v_pow2k:
+; X86: # %bb.0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: shrdl $1, %esi, %edx
+; X86-NEXT: shldl $31, %edi, %ecx
+; X86-NEXT: shldl $31, %esi, %edi
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: movl $0, 12(%eax)
+; X86-NEXT: movb $0, 16(%eax)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: retl $4
;
-; X32-O0-LABEL: v_udiv_exact_i129_v_pow2k:
-; X32-O0: # %bb.0:
-; X32-O0-NEXT: pushl %ebx
-; X32-O0-NEXT: .cfi_def_cfa_offset 8
-; X32-O0-NEXT: pushl %edi
-; X32-O0-NEXT: .cfi_def_cfa_offset 12
-; X32-O0-NEXT: pushl %esi
-; X32-O0-NEXT: .cfi_def_cfa_offset 16
-; X32-O0-NEXT: .cfi_offset %esi, -16
-; X32-O0-NEXT: .cfi_offset %edi, -12
-; X32-O0-NEXT: .cfi_offset %ebx, -8
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-O0-NEXT: movl %ecx, %eax
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X32-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X32-O0-NEXT: shldl $31, %esi, %edx
-; X32-O0-NEXT: shldl $31, %edi, %esi
-; X32-O0-NEXT: shldl $31, %ebx, %edi
-; X32-O0-NEXT: movl %edi, (%ecx)
-; X32-O0-NEXT: movl %esi, 4(%ecx)
-; X32-O0-NEXT: movl %edx, 8(%ecx)
-; X32-O0-NEXT: movl $0, 12(%ecx)
-; X32-O0-NEXT: movb $0, 16(%ecx)
-; X32-O0-NEXT: popl %esi
-; X32-O0-NEXT: .cfi_def_cfa_offset 12
-; X32-O0-NEXT: popl %edi
-; X32-O0-NEXT: .cfi_def_cfa_offset 8
-; X32-O0-NEXT: popl %ebx
-; X32-O0-NEXT: .cfi_def_cfa_offset 4
-; X32-O0-NEXT: retl $4
+; X86-O0-LABEL: v_udiv_exact_i129_v_pow2k:
+; X86-O0: # %bb.0:
+; X86-O0-NEXT: pushl %ebx
+; X86-O0-NEXT: pushl %edi
+; X86-O0-NEXT: pushl %esi
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-O0-NEXT: movl %ecx, %eax
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-O0-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-O0-NEXT: shldl $31, %esi, %edx
+; X86-O0-NEXT: shldl $31, %edi, %esi
+; X86-O0-NEXT: shldl $31, %ebx, %edi
+; X86-O0-NEXT: movl %edi, (%ecx)
+; X86-O0-NEXT: movl %esi, 4(%ecx)
+; X86-O0-NEXT: movl %edx, 8(%ecx)
+; X86-O0-NEXT: movl $0, 12(%ecx)
+; X86-O0-NEXT: movb $0, 16(%ecx)
+; X86-O0-NEXT: popl %esi
+; X86-O0-NEXT: popl %edi
+; X86-O0-NEXT: popl %ebx
+; X86-O0-NEXT: retl $4
%div = udiv exact i129 %lhs, 8589934592
ret i129 %div
}
More information about the llvm-commits
mailing list