[llvm] [X86] Align f128 and i128 to 16 bytes when passing on x86-32 (PR #138092)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 11 05:25:07 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Trevor Gross (tgross35)
<details>
<summary>Changes</summary>
The i386 psABI specifies that `__float128` has 16 byte alignment and
must be passed on the stack; however, LLVM currently stores it in a
stack slot that has an offset of 4. Add a custom lowering to correct
this alignment to 16-byte.
i386 does not specify an `__int128`, but it seems reasonable to keep the
same behavior as `__float128` so this is changed as well.
Fixes: https://github.com/llvm/llvm-project/issues/77401
---
Patch is 551.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138092.diff
52 Files Affected:
- (modified) llvm/docs/ReleaseNotes.md (+2)
- (modified) llvm/lib/Target/X86/X86CallingConv.cpp (+32)
- (modified) llvm/lib/Target/X86/X86CallingConv.td (+5)
- (modified) llvm/lib/Target/X86/X86ISelLoweringCall.cpp (+12-3)
- (modified) llvm/test/CodeGen/X86/abds-neg.ll (+218-192)
- (modified) llvm/test/CodeGen/X86/abds.ll (+208-182)
- (modified) llvm/test/CodeGen/X86/abdu-neg.ll (+149-133)
- (modified) llvm/test/CodeGen/X86/abdu.ll (+120-105)
- (modified) llvm/test/CodeGen/X86/abs.ll (+32-23)
- (modified) llvm/test/CodeGen/X86/add-sub-bool.ll (+15-10)
- (modified) llvm/test/CodeGen/X86/all-ones-vector.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/arg-copy-elide.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/avx512fp16-cvt.ll (+29-13)
- (modified) llvm/test/CodeGen/X86/bitselect.ll (+29-26)
- (modified) llvm/test/CodeGen/X86/bsf.ll (+78-66)
- (modified) llvm/test/CodeGen/X86/bsr.ll (+82-76)
- (modified) llvm/test/CodeGen/X86/bswap-wide-int.ll (+20-10)
- (modified) llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll (+18-18)
- (modified) llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll (+47-47)
- (modified) llvm/test/CodeGen/X86/fp128-cast-strict.ll (+52-40)
- (modified) llvm/test/CodeGen/X86/fp128-cast.ll (+71-54)
- (modified) llvm/test/CodeGen/X86/fp128-libcalls-strict.ll (+1260-800)
- (modified) llvm/test/CodeGen/X86/fp128-libcalls.ll (+1121-652)
- (modified) llvm/test/CodeGen/X86/fshl.ll (+104-81)
- (modified) llvm/test/CodeGen/X86/fshr.ll (+91-79)
- (modified) llvm/test/CodeGen/X86/funnel-shift.ll (+44-30)
- (modified) llvm/test/CodeGen/X86/i128-add.ll (+14-9)
- (modified) llvm/test/CodeGen/X86/i128-fp128-abi.ll (+471-235)
- (modified) llvm/test/CodeGen/X86/i128-sdiv.ll (+348-27)
- (modified) llvm/test/CodeGen/X86/i128-udiv.ll (+590-7)
- (modified) llvm/test/CodeGen/X86/iabs.ll (+23-20)
- (modified) llvm/test/CodeGen/X86/icmp-shift-opt.ll (+69-33)
- (modified) llvm/test/CodeGen/X86/mul128.ll (+46-51)
- (modified) llvm/test/CodeGen/X86/neg-abs.ll (+32-23)
- (modified) llvm/test/CodeGen/X86/popcnt.ll (+275-210)
- (modified) llvm/test/CodeGen/X86/pr46004.ll (+19)
- (modified) llvm/test/CodeGen/X86/scalar-fp-to-i32.ll (+54-22)
- (modified) llvm/test/CodeGen/X86/scalar-fp-to-i64.ll (+54-22)
- (modified) llvm/test/CodeGen/X86/scmp.ll (+21-18)
- (modified) llvm/test/CodeGen/X86/sdiv_fix.ll (+50-49)
- (modified) llvm/test/CodeGen/X86/sdiv_fix_sat.ll (+222-218)
- (modified) llvm/test/CodeGen/X86/shift-combine.ll (+12-2)
- (modified) llvm/test/CodeGen/X86/shift-i128.ll (+52-20)
- (modified) llvm/test/CodeGen/X86/smax.ll (+42-36)
- (modified) llvm/test/CodeGen/X86/smin.ll (+43-38)
- (modified) llvm/test/CodeGen/X86/ucmp.ll (+19-15)
- (modified) llvm/test/CodeGen/X86/udiv_fix.ll (+15-13)
- (modified) llvm/test/CodeGen/X86/udiv_fix_sat.ll (+15-13)
- (modified) llvm/test/CodeGen/X86/umax.ll (+72-63)
- (modified) llvm/test/CodeGen/X86/umin.ll (+43-38)
- (modified) llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/wide-integer-cmp.ll (+8-6)
``````````diff
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index daf822388a2ff..e91460d3a551c 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -228,6 +228,8 @@ Changes to the X86 Backend
--------------------------
* `fp128` will now use `*f128` libcalls on 32-bit GNU targets as well.
+* On x86-32, `fp128` and `i128` are now passed with the expected 16-byte stack
+ alignment.
Changes to the OCaml bindings
-----------------------------
diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp
index 0b4c63f7a81f7..eb39259f7166b 100644
--- a/llvm/lib/Target/X86/X86CallingConv.cpp
+++ b/llvm/lib/Target/X86/X86CallingConv.cpp
@@ -374,5 +374,37 @@ static bool CC_X86_64_I128(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return true;
}
+/// Special handling for i128 and fp128: on x86-32, i128 and fp128 get legalized
+/// as four i32s, but fp128 must be passed on the stack with 16-byte alignment.
+/// Technically only fp128 has a specified ABI, but it makes sense to handle
+/// i128 the same until we hear differently.
+static bool CC_X86_32_I128_FP128(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ assert(ValVT == MVT::i32 && "Should have i32 parts");
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+ PendingMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+ if (!ArgFlags.isInConsecutiveRegsLast())
+ return true;
+
+ unsigned NumRegs = PendingMembers.size();
+ assert(NumRegs == 4 && "Should have two parts");
+
+ int64_t Offset = State.AllocateStack(16, Align(16));
+ PendingMembers[0].convertToMem(Offset);
+ PendingMembers[1].convertToMem(Offset + 4);
+ PendingMembers[2].convertToMem(Offset + 8);
+ PendingMembers[3].convertToMem(Offset + 12);
+
+ State.addLoc(PendingMembers[0]);
+ State.addLoc(PendingMembers[1]);
+ State.addLoc(PendingMembers[2]);
+ State.addLoc(PendingMembers[3]);
+ PendingMembers.clear();
+ return true;
+}
+
// Provides entry points of CC_X86 and RetCC_X86.
#include "X86GenCallingConv.inc"
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 823e0caa02262..f020e0b55141c 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -859,6 +859,11 @@ def CC_X86_32_C : CallingConv<[
// The 'nest' parameter, if any, is passed in ECX.
CCIfNest<CCAssignToReg<[ECX]>>,
+ // i128 and fp128 need to be passed on the stack with a higher alignment than
+ // their legal types. Handle this with a custom function.
+ CCIfType<[i32],
+ CCIfConsecutiveRegs<CCCustom<"CC_X86_32_I128_FP128">>>,
+
// On swifttailcc pass swiftself in ECX.
CCIfCC<"CallingConv::SwiftTail",
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[ECX]>>>>,
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index 9ad355311527b..b4639ac2577e8 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -237,9 +237,18 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
bool X86TargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
const DataLayout &DL) const {
- // i128 split into i64 needs to be allocated to two consecutive registers,
- // or spilled to the stack as a whole.
- return Ty->isIntegerTy(128);
+ // On x86-64 i128 is split into two i64s and needs to be allocated to two
+ // consecutive registers, or spilled to the stack as a whole. On x86-32 i128
+ // is split to four i32s and never actually passed in registers, but we use
+ // the consecutive register mark to match it in TableGen.
+ if (Ty->isIntegerTy(128))
+ return true;
+
+ // On x86-32, fp128 acts the same as i128.
+ if (Subtarget.is32Bit() && Ty->isFP128Ty())
+ return true;
+
+ return false;
}
/// Helper for getByValTypeAlignment to determine
diff --git a/llvm/test/CodeGen/X86/abds-neg.ll b/llvm/test/CodeGen/X86/abds-neg.ll
index f6d66ab47ce05..2911edfbfd409 100644
--- a/llvm/test/CodeGen/X86/abds-neg.ll
+++ b/llvm/test/CodeGen/X86/abds-neg.ll
@@ -367,44 +367,49 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %edx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl %edi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %esi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll %eax, %esi
-; X86-NEXT: cmovll %ebx, %edi
-; X86-NEXT: cmovll %ebp, %edx
-; X86-NEXT: cmovll (%esp), %ecx # 4-byte Folded Reload
-; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %ebx
+; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: movl 52(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %eax
+; X86-NEXT: sbbl 32(%ebp), %edx
+; X86-NEXT: sbbl 36(%ebp), %esi
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %ebx, %edx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: xorl %edi, %edi
; X86-NEXT: negl %ecx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %edx, %ebp
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %ebp, 4(%eax)
-; X86-NEXT: movl %edx, 8(%eax)
-; X86-NEXT: movl %ebx, 12(%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %eax, %ebx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %ebx, 4(%edx)
+; X86-NEXT: movl %eax, 8(%edx)
+; X86-NEXT: movl %edi, 12(%edx)
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -438,44 +443,49 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_ext_i128_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %edx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl %edi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %esi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll %eax, %esi
-; X86-NEXT: cmovll %ebx, %edi
-; X86-NEXT: cmovll %ebp, %edx
-; X86-NEXT: cmovll (%esp), %ecx # 4-byte Folded Reload
-; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: movl 24(%ebp), %edx
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %ebx
+; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: movl 52(%ebp), %esi
+; X86-NEXT: movl 36(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: subl 24(%ebp), %ecx
+; X86-NEXT: sbbl 28(%ebp), %eax
+; X86-NEXT: sbbl 32(%ebp), %edx
+; X86-NEXT: sbbl 36(%ebp), %esi
+; X86-NEXT: cmovll %edi, %esi
+; X86-NEXT: cmovll %ebx, %edx
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: xorl %edi, %edi
; X86-NEXT: negl %ecx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %edx, %ebp
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %ebp, 4(%eax)
-; X86-NEXT: movl %edx, 8(%eax)
-; X86-NEXT: movl %ebx, 12(%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %eax, %ebx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %ebx, 4(%edx)
+; X86-NEXT: movl %eax, 8(%edx)
+; X86-NEXT: movl %edi, 12(%edx)
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -639,55 +649,59 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_minmax_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl %eax, %esi
-; X86-NEXT: sbbl %ebx, %ecx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: sbbl %ebp, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: sbbl %edi, %ecx
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: cmovll %edx, %ecx
-; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ebx, %ecx
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: cmovll %esi, %edx
-; X86-NEXT: cmpl %esi, %eax
-; X86-NEXT: movl %ebx, %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %edi, %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %eax
-; X86-NEXT: subl %eax, %edx
-; X86-NEXT: sbbl %ebx, %ecx
-; X86-NEXT: sbbl %esi, %ebp
-; X86-NEXT: movl (%esp), %esi # 4-byte Reload
-; X86-NEXT: sbbl %edi, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: movl %ecx, 4(%eax)
-; X86-NEXT: movl %ebp, 8(%eax)
-; X86-NEXT: movl %esi, 12(%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 40(%ebp), %esi
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: cmpl %esi, %edi
+; X86-NEXT: sbbl 44(%ebp), %eax
+; X86-NEXT: movl 48(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl 52(%ebp), %ebx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: cmovll %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: cmovll 32(%ebp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: cmovll 28(%ebp), %eax
+; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: cmovll %edi, %ecx
+; X86-NEXT: cmpl %edi, %esi
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: sbbl 28(%ebp), %edi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: sbbl 32(%ebp), %edi
+; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: sbbl 36(%ebp), %edi
+; X86-NEXT: cmovll 36(%ebp), %ebx
+; X86-NEXT: cmovll 32(%ebp), %edx
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: cmovll 28(%ebp), %edi
+; X86-NEXT: cmovll 24(%ebp), %esi
+; X86-NEXT: subl %esi, %ecx
+; X86-NEXT: sbbl %edi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %eax, 4(%edx)
+; X86-NEXT: movl %edi, 8(%edx)
+; X86-NEXT: movl %esi, 12(%edx)
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -848,37 +862,41 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_cmp_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovgel (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovgel %ebx, %esi
-; X86-NEXT: cmovgel %ebp, %ecx
-; X86-NEXT: cmovgel %eax, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 24(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl 40(%ebp), %eax
+; X86-NEXT: movl 44(%ebp), %esi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 48(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 36(%ebp), %ebx
+; X86-NEXT: movl 52(%ebp), %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: subl 40(%ebp), %ecx
+; X86-NEXT: sbbl 44(%ebp), %edx
+; X86-NEXT: sbbl 48(%ebp), %esi
+; X86-NEXT: sbbl 52(%ebp), %ebx
+; X86-NEXT: cmovgel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovgel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovgel %edi, %esi
+; X86-NEXT: cmovgel %eax, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -1118,35 +1136,39 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_subnsw_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: subl %edi, %ebp
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: sbbl %esi, %edi
-; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: subl 40(%ebp), %esi
+; X86-NEXT: sbbl 44(%ebp), %edx
+; X86-NEXT: sbbl 48(%ebp), %ecx
+; X86-NEXT: sbbl 52(%ebp), %eax
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: sarl $31, %edi
+; X86-NEXT: xorl %edi, %eax
+; X86-NEXT: xorl %edi, %ecx
+; X86-NEXT: xorl %edi, %edx
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: subl %esi, %ebx
+; X86-NEXT: movl %edi, %esi
; X86-NEXT: sbbl %edx, %esi
-; X86-NEXT: sbbl %ecx, %ebx
-; X86-NEXT: movl %ebp, (%eax)
-; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
-; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: sbbl %ecx, %edx
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -1175,35 +1197,39 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_subnsw_i128_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: subl %edi, %ebp
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: sbbl %esi, %edi
-; X86-NEXT: m...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/138092
More information about the llvm-commits
mailing list