[llvm] 2068b1b - [X86] Fix ABI for passing after i128 (#124134)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 24 06:31:58 PST 2025
Author: Nikita Popov
Date: 2025-01-24T15:31:53+01:00
New Revision: 2068b1ba031e258a6448bea372005d19692c802a
URL: https://github.com/llvm/llvm-project/commit/2068b1ba031e258a6448bea372005d19692c802a
DIFF: https://github.com/llvm/llvm-project/commit/2068b1ba031e258a6448bea372005d19692c802a.diff
LOG: [X86] Fix ABI for passing after i128 (#124134)
If we're passing an i128 value and we no longer have enough argument
registers (only r9 unallocated), the value gets passed via the stack.
However, r9 is still allocated as a shadow register, which means that a
following i64 argument will not use it. This doesn't match the x86-64
psABI.
Fix this by making i128 arguments as requiring consecutive registers,
and then adding a custom CC lowering that will allocate both parts of
the i128 at the same time, either to register or to stack, without
reserving a shadow register.
Fixes https://github.com/llvm/llvm-project/issues/123935.
Added:
Modified:
llvm/lib/Target/X86/X86CallingConv.cpp
llvm/lib/Target/X86/X86CallingConv.td
llvm/lib/Target/X86/X86ISelLowering.h
llvm/lib/Target/X86/X86ISelLoweringCall.cpp
llvm/test/CodeGen/X86/addcarry.ll
llvm/test/CodeGen/X86/apx/flags-copy-lowering.ll
llvm/test/CodeGen/X86/avgflooru-i128.ll
llvm/test/CodeGen/X86/fmuladd-soft-float.ll
llvm/test/CodeGen/X86/i128-abi.ll
llvm/test/CodeGen/X86/sadd_sat_vec.ll
llvm/test/CodeGen/X86/ssub_sat_vec.ll
llvm/test/CodeGen/X86/subcarry.ll
llvm/test/CodeGen/X86/uadd_sat_vec.ll
llvm/test/CodeGen/X86/usub_sat_vec.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp
index 7359ef341dde58..0b4c63f7a81f78 100644
--- a/llvm/lib/Target/X86/X86CallingConv.cpp
+++ b/llvm/lib/Target/X86/X86CallingConv.cpp
@@ -340,5 +340,39 @@ static bool CC_X86_64_Pointer(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return false;
}
+/// Special handling for i128: Either allocate the value to two consecutive
+/// i64 registers, or to the stack. Do not partially allocate in registers,
+/// and do not reserve any registers when allocating to the stack.
+static bool CC_X86_64_I128(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ assert(ValVT == MVT::i64 && "Should have i64 parts");
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+ PendingMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+ if (!ArgFlags.isInConsecutiveRegsLast())
+ return true;
+
+ unsigned NumRegs = PendingMembers.size();
+ assert(NumRegs == 2 && "Should have two parts");
+
+ static const MCPhysReg Regs[] = {X86::RDI, X86::RSI, X86::RDX,
+ X86::RCX, X86::R8, X86::R9};
+ ArrayRef<MCPhysReg> Allocated = State.AllocateRegBlock(Regs, NumRegs);
+ if (!Allocated.empty()) {
+ PendingMembers[0].convertToReg(Allocated[0]);
+ PendingMembers[1].convertToReg(Allocated[1]);
+ } else {
+ int64_t Offset = State.AllocateStack(16, Align(16));
+ PendingMembers[0].convertToMem(Offset);
+ PendingMembers[1].convertToMem(Offset + 8);
+ }
+ State.addLoc(PendingMembers[0]);
+ State.addLoc(PendingMembers[1]);
+ PendingMembers.clear();
+ return true;
+}
+
// Provides entry points of CC_X86 and RetCC_X86.
#include "X86GenCallingConv.inc"
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 91af111db8cda5..72b103b0bb0c50 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -548,11 +548,9 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
// i128 can be either passed in two i64 registers, or on the stack, but
- // not split across register and stack. As such, do not allow using R9
- // for a split i64.
+ // not split across register and stack. Handle this with a custom function.
CCIfType<[i64],
- CCIfSplit<CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
- CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [R9]>>>,
+ CCIfConsecutiveRegs<CCCustom<"CC_X86_64_I128">>>,
CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index e07bcd989c5188..fe79fefeed631c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1604,6 +1604,10 @@ namespace llvm {
LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
unsigned &NumIntermediates, MVT &RegisterVT) const override;
+ bool functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg,
+ const DataLayout &DL) const override;
+
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool supportSwiftError() const override;
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index 4a4fd246cb7cdf..6835c7e336a5cb 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -233,6 +233,14 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
return VT.changeVectorElementTypeToInteger();
}
+bool X86TargetLowering::functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg,
+ const DataLayout &DL) const {
+ // i128 split into i64 needs to be allocated to two consecutive registers,
+ // or spilled to the stack as a whole.
+ return Ty->isIntegerTy(128);
+}
+
/// Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll
index f8d32fc2d29252..97894db1188e28 100644
--- a/llvm/test/CodeGen/X86/addcarry.ll
+++ b/llvm/test/CodeGen/X86/addcarry.ll
@@ -49,7 +49,7 @@ define i256 @add256(i256 %a, i256 %b) nounwind {
; CHECK-LABEL: add256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT: addq %r9, %rsi
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %r8
diff --git a/llvm/test/CodeGen/X86/apx/flags-copy-lowering.ll b/llvm/test/CodeGen/X86/apx/flags-copy-lowering.ll
index deca130a04ff04..bd764c2edef29d 100644
--- a/llvm/test/CodeGen/X86/apx/flags-copy-lowering.ll
+++ b/llvm/test/CodeGen/X86/apx/flags-copy-lowering.ll
@@ -31,15 +31,15 @@ define <2 x i128> @flag_copy_2(<2 x i128> %x, <2 x i128> %y) nounwind {
; CHECK-NEXT: movq %r8, %rdi
; CHECK-NEXT: {nf} sarq $63, %rdi
; CHECK-NEXT: cmovoq %rdi, %rcx
-; CHECK-NEXT: movabsq $-9223372036854775808, %r9 # imm = 0x8000000000000000
-; CHECK-NEXT: {nf} xorq %r9, %rdi
+; CHECK-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
+; CHECK-NEXT: {nf} xorq %r10, %rdi
; CHECK-NEXT: cmovnoq %r8, %rdi
-; CHECK-NEXT: subq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT: subq %r9, %rsi
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: movq %rdx, %r8
; CHECK-NEXT: {nf} sarq $63, %r8
; CHECK-NEXT: cmovoq %r8, %rsi
-; CHECK-NEXT: {nf} xorq %r9, %r8
+; CHECK-NEXT: {nf} xorq %r10, %r8
; CHECK-NEXT: cmovnoq %rdx, %r8
; CHECK-NEXT: movq %rcx, 16(%rax)
; CHECK-NEXT: movq %rsi, (%rax)
diff --git a/llvm/test/CodeGen/X86/avgflooru-i128.ll b/llvm/test/CodeGen/X86/avgflooru-i128.ll
index da16a7da48ca69..11e886e25ba4ea 100644
--- a/llvm/test/CodeGen/X86/avgflooru-i128.ll
+++ b/llvm/test/CodeGen/X86/avgflooru-i128.ll
@@ -119,7 +119,7 @@ define <2 x i128> @avgflooru_i128_vec(<2 x i128> %x, <2 x i128> %y) {
; CHECK-LABEL: avgflooru_i128_vec:
; CHECK: # %bb.0: # %start
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT: addq %r9, %rsi
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: setb %dil
; CHECK-NEXT: movzbl %dil, %edi
diff --git a/llvm/test/CodeGen/X86/fmuladd-soft-float.ll b/llvm/test/CodeGen/X86/fmuladd-soft-float.ll
index ccb2f37590b0ad..cbdfa32ed46274 100644
--- a/llvm/test/CodeGen/X86/fmuladd-soft-float.ll
+++ b/llvm/test/CodeGen/X86/fmuladd-soft-float.ll
@@ -1555,30 +1555,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-NEXT: .cfi_offset %r14, -32
; SOFT-FLOAT-64-NEXT: .cfi_offset %r15, -24
; SOFT-FLOAT-64-NEXT: .cfi_offset %rbp, -16
+; SOFT-FLOAT-64-NEXT: movq %r9, %rbp
; SOFT-FLOAT-64-NEXT: movq %rcx, %r14
; SOFT-FLOAT-64-NEXT: movq %rdx, %r15
-; SOFT-FLOAT-64-NEXT: movq %rsi, %r12
+; SOFT-FLOAT-64-NEXT: movq %rsi, %r13
; SOFT-FLOAT-64-NEXT: movq %rdi, %rbx
-; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rbp
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: movq %r8, %rdi
; SOFT-FLOAT-64-NEXT: callq __muldf3 at PLT
-; SOFT-FLOAT-64-NEXT: movq %rax, %r13
+; SOFT-FLOAT-64-NEXT: movq %rax, %r12
; SOFT-FLOAT-64-NEXT: movq %r14, %rdi
-; SOFT-FLOAT-64-NEXT: movq %rbp, %rsi
+; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __muldf3 at PLT
; SOFT-FLOAT-64-NEXT: movq %rax, %r14
; SOFT-FLOAT-64-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __muldf3 at PLT
; SOFT-FLOAT-64-NEXT: movq %rax, %r15
-; SOFT-FLOAT-64-NEXT: movq %r12, %rdi
-; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
+; SOFT-FLOAT-64-NEXT: movq %r13, %rdi
+; SOFT-FLOAT-64-NEXT: movq %rbp, %rsi
; SOFT-FLOAT-64-NEXT: callq __muldf3 at PLT
; SOFT-FLOAT-64-NEXT: movq %rax, %rdi
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __adddf3 at PLT
-; SOFT-FLOAT-64-NEXT: movq %rax, %r12
+; SOFT-FLOAT-64-NEXT: movq %rax, %r13
; SOFT-FLOAT-64-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __adddf3 at PLT
@@ -1587,13 +1587,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __adddf3 at PLT
; SOFT-FLOAT-64-NEXT: movq %rax, %r14
-; SOFT-FLOAT-64-NEXT: movq %r13, %rdi
+; SOFT-FLOAT-64-NEXT: movq %r12, %rdi
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __adddf3 at PLT
; SOFT-FLOAT-64-NEXT: movq %rax, 24(%rbx)
; SOFT-FLOAT-64-NEXT: movq %r14, 16(%rbx)
; SOFT-FLOAT-64-NEXT: movq %r15, 8(%rbx)
-; SOFT-FLOAT-64-NEXT: movq %r12, (%rbx)
+; SOFT-FLOAT-64-NEXT: movq %r13, (%rbx)
; SOFT-FLOAT-64-NEXT: movq %rbx, %rax
; SOFT-FLOAT-64-NEXT: addq $8, %rsp
; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 56
@@ -1633,30 +1633,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r14, -32
; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r15, -24
; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbp, -16
+; SOFT-FLOAT-64-FMA-NEXT: movq %r9, %rbp
; SOFT-FLOAT-64-FMA-NEXT: movq %rcx, %r14
; SOFT-FLOAT-64-FMA-NEXT: movq %rdx, %r15
-; SOFT-FLOAT-64-FMA-NEXT: movq %rsi, %r12
+; SOFT-FLOAT-64-FMA-NEXT: movq %rsi, %r13
; SOFT-FLOAT-64-FMA-NEXT: movq %rdi, %rbx
-; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rbp
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: movq %r8, %rdi
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3 at PLT
-; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r13
+; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r12
; SOFT-FLOAT-64-FMA-NEXT: movq %r14, %rdi
-; SOFT-FLOAT-64-FMA-NEXT: movq %rbp, %rsi
+; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3 at PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r14
; SOFT-FLOAT-64-FMA-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3 at PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r15
-; SOFT-FLOAT-64-FMA-NEXT: movq %r12, %rdi
-; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
+; SOFT-FLOAT-64-FMA-NEXT: movq %r13, %rdi
+; SOFT-FLOAT-64-FMA-NEXT: movq %rbp, %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3 at PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3 at PLT
-; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r12
+; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r13
; SOFT-FLOAT-64-FMA-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3 at PLT
@@ -1665,13 +1665,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3 at PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r14
-; SOFT-FLOAT-64-FMA-NEXT: movq %r13, %rdi
+; SOFT-FLOAT-64-FMA-NEXT: movq %r12, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3 at PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, 24(%rbx)
; SOFT-FLOAT-64-FMA-NEXT: movq %r14, 16(%rbx)
; SOFT-FLOAT-64-FMA-NEXT: movq %r15, 8(%rbx)
-; SOFT-FLOAT-64-FMA-NEXT: movq %r12, (%rbx)
+; SOFT-FLOAT-64-FMA-NEXT: movq %r13, (%rbx)
; SOFT-FLOAT-64-FMA-NEXT: movq %rbx, %rax
; SOFT-FLOAT-64-FMA-NEXT: addq $8, %rsp
; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 56
@@ -1711,30 +1711,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r14, -32
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r15, -24
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbp, -16
+; SOFT-FLOAT-64-FMA4-NEXT: movq %r9, %rbp
; SOFT-FLOAT-64-FMA4-NEXT: movq %rcx, %r14
; SOFT-FLOAT-64-FMA4-NEXT: movq %rdx, %r15
-; SOFT-FLOAT-64-FMA4-NEXT: movq %rsi, %r12
+; SOFT-FLOAT-64-FMA4-NEXT: movq %rsi, %r13
; SOFT-FLOAT-64-FMA4-NEXT: movq %rdi, %rbx
-; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rbp
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: movq %r8, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3 at PLT
-; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r13
+; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r12
; SOFT-FLOAT-64-FMA4-NEXT: movq %r14, %rdi
-; SOFT-FLOAT-64-FMA4-NEXT: movq %rbp, %rsi
+; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3 at PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r14
; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3 at PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r15
-; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, %rdi
-; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
+; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, %rdi
+; SOFT-FLOAT-64-FMA4-NEXT: movq %rbp, %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3 at PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3 at PLT
-; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r12
+; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r13
; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3 at PLT
@@ -1743,13 +1743,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3 at PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r14
-; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, %rdi
+; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3 at PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, 24(%rbx)
; SOFT-FLOAT-64-FMA4-NEXT: movq %r14, 16(%rbx)
; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, 8(%rbx)
-; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, (%rbx)
+; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, (%rbx)
; SOFT-FLOAT-64-FMA4-NEXT: movq %rbx, %rax
; SOFT-FLOAT-64-FMA4-NEXT: addq $8, %rsp
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 56
diff --git a/llvm/test/CodeGen/X86/i128-abi.ll b/llvm/test/CodeGen/X86/i128-abi.ll
index 23eb6ec0322abd..264c546b4cae22 100644
--- a/llvm/test/CodeGen/X86/i128-abi.ll
+++ b/llvm/test/CodeGen/X86/i128-abi.ll
@@ -31,7 +31,7 @@ define i128 @on_stack2(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5, i1
define i64 @trailing_arg_on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5, i64 %a6) {
; CHECK-LABEL: trailing_arg_on_stack:
; CHECK: # %bb.0:
-; CHECK-NEXT: movq 24(%rsp), %rax
+; CHECK-NEXT: movq %r9, %rax
; CHECK-NEXT: retq
ret i64 %a6
}
@@ -78,20 +78,18 @@ define void @call_trailing_arg_on_stack(i128 %x, i64 %y) nounwind {
; CHECK-LABEL: call_trailing_arg_on_stack:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: movq %rdx, %rax
-; CHECK-NEXT: movq %rsi, %r9
+; CHECK-NEXT: movq %rdx, %r9
+; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: movq %rdi, %r10
-; CHECK-NEXT: subq $8, %rsp
; CHECK-NEXT: movl $1, %esi
; CHECK-NEXT: movl $2, %edx
; CHECK-NEXT: movl $3, %ecx
; CHECK-NEXT: movl $4, %r8d
; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: pushq %r9
; CHECK-NEXT: pushq %r10
; CHECK-NEXT: callq trailing_arg_on_stack at PLT
-; CHECK-NEXT: addq $32, %rsp
+; CHECK-NEXT: addq $16, %rsp
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
call i128 @trailing_arg_on_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i128 %x, i64 %y)
diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
index 322acd76e12e63..bd563f97b0ac4b 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
@@ -1795,27 +1795,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8
; SSE-NEXT: seto %dil
-; SSE-NEXT: movq %r8, %r9
-; SSE-NEXT: sarq $63, %r9
+; SSE-NEXT: movq %r8, %r10
+; SSE-NEXT: sarq $63, %r10
; SSE-NEXT: testb %dil, %dil
-; SSE-NEXT: cmovneq %r9, %rcx
-; SSE-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
-; SSE-NEXT: xorq %r10, %r9
+; SSE-NEXT: cmovneq %r10, %rcx
+; SSE-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
+; SSE-NEXT: xorq %r11, %r10
; SSE-NEXT: testb %dil, %dil
-; SSE-NEXT: cmoveq %r8, %r9
-; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rsi
+; SSE-NEXT: cmoveq %r8, %r10
+; SSE-NEXT: addq %r9, %rsi
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; SSE-NEXT: seto %dil
; SSE-NEXT: movq %rdx, %r8
; SSE-NEXT: sarq $63, %r8
; SSE-NEXT: testb %dil, %dil
; SSE-NEXT: cmovneq %r8, %rsi
-; SSE-NEXT: xorq %r10, %r8
+; SSE-NEXT: xorq %r11, %r8
; SSE-NEXT: testb %dil, %dil
; SSE-NEXT: cmoveq %rdx, %r8
; SSE-NEXT: movq %rcx, 16(%rax)
; SSE-NEXT: movq %rsi, (%rax)
-; SSE-NEXT: movq %r9, 24(%rax)
+; SSE-NEXT: movq %r10, 24(%rax)
; SSE-NEXT: movq %r8, 8(%rax)
; SSE-NEXT: retq
;
@@ -1825,27 +1825,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8
; AVX-NEXT: seto %dil
-; AVX-NEXT: movq %r8, %r9
-; AVX-NEXT: sarq $63, %r9
+; AVX-NEXT: movq %r8, %r10
+; AVX-NEXT: sarq $63, %r10
; AVX-NEXT: testb %dil, %dil
-; AVX-NEXT: cmovneq %r9, %rcx
-; AVX-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
-; AVX-NEXT: xorq %r10, %r9
+; AVX-NEXT: cmovneq %r10, %rcx
+; AVX-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
+; AVX-NEXT: xorq %r11, %r10
; AVX-NEXT: testb %dil, %dil
-; AVX-NEXT: cmoveq %r8, %r9
-; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rsi
+; AVX-NEXT: cmoveq %r8, %r10
+; AVX-NEXT: addq %r9, %rsi
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; AVX-NEXT: seto %dil
; AVX-NEXT: movq %rdx, %r8
; AVX-NEXT: sarq $63, %r8
; AVX-NEXT: testb %dil, %dil
; AVX-NEXT: cmovneq %r8, %rsi
-; AVX-NEXT: xorq %r10, %r8
+; AVX-NEXT: xorq %r11, %r8
; AVX-NEXT: testb %dil, %dil
; AVX-NEXT: cmoveq %rdx, %r8
; AVX-NEXT: movq %rcx, 16(%rax)
; AVX-NEXT: movq %rsi, (%rax)
-; AVX-NEXT: movq %r9, 24(%rax)
+; AVX-NEXT: movq %r10, 24(%rax)
; AVX-NEXT: movq %r8, 8(%rax)
; AVX-NEXT: retq
%z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
index ac8b561abf0033..88df3c175ec9cc 100644
--- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
@@ -2026,27 +2026,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rcx
; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
; SSE-NEXT: seto %dil
-; SSE-NEXT: movq %r8, %r9
-; SSE-NEXT: sarq $63, %r9
+; SSE-NEXT: movq %r8, %r10
+; SSE-NEXT: sarq $63, %r10
; SSE-NEXT: testb %dil, %dil
-; SSE-NEXT: cmovneq %r9, %rcx
-; SSE-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
-; SSE-NEXT: xorq %r10, %r9
+; SSE-NEXT: cmovneq %r10, %rcx
+; SSE-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
+; SSE-NEXT: xorq %r11, %r10
; SSE-NEXT: testb %dil, %dil
-; SSE-NEXT: cmoveq %r8, %r9
-; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rsi
+; SSE-NEXT: cmoveq %r8, %r10
+; SSE-NEXT: subq %r9, %rsi
; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
; SSE-NEXT: seto %dil
; SSE-NEXT: movq %rdx, %r8
; SSE-NEXT: sarq $63, %r8
; SSE-NEXT: testb %dil, %dil
; SSE-NEXT: cmovneq %r8, %rsi
-; SSE-NEXT: xorq %r10, %r8
+; SSE-NEXT: xorq %r11, %r8
; SSE-NEXT: testb %dil, %dil
; SSE-NEXT: cmoveq %rdx, %r8
; SSE-NEXT: movq %rcx, 16(%rax)
; SSE-NEXT: movq %rsi, (%rax)
-; SSE-NEXT: movq %r9, 24(%rax)
+; SSE-NEXT: movq %r10, 24(%rax)
; SSE-NEXT: movq %r8, 8(%rax)
; SSE-NEXT: retq
;
@@ -2056,27 +2056,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rcx
; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
; AVX-NEXT: seto %dil
-; AVX-NEXT: movq %r8, %r9
-; AVX-NEXT: sarq $63, %r9
+; AVX-NEXT: movq %r8, %r10
+; AVX-NEXT: sarq $63, %r10
; AVX-NEXT: testb %dil, %dil
-; AVX-NEXT: cmovneq %r9, %rcx
-; AVX-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
-; AVX-NEXT: xorq %r10, %r9
+; AVX-NEXT: cmovneq %r10, %rcx
+; AVX-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
+; AVX-NEXT: xorq %r11, %r10
; AVX-NEXT: testb %dil, %dil
-; AVX-NEXT: cmoveq %r8, %r9
-; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rsi
+; AVX-NEXT: cmoveq %r8, %r10
+; AVX-NEXT: subq %r9, %rsi
; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
; AVX-NEXT: seto %dil
; AVX-NEXT: movq %rdx, %r8
; AVX-NEXT: sarq $63, %r8
; AVX-NEXT: testb %dil, %dil
; AVX-NEXT: cmovneq %r8, %rsi
-; AVX-NEXT: xorq %r10, %r8
+; AVX-NEXT: xorq %r11, %r8
; AVX-NEXT: testb %dil, %dil
; AVX-NEXT: cmoveq %rdx, %r8
; AVX-NEXT: movq %rcx, 16(%rax)
; AVX-NEXT: movq %rsi, (%rax)
-; AVX-NEXT: movq %r9, 24(%rax)
+; AVX-NEXT: movq %r10, 24(%rax)
; AVX-NEXT: movq %r8, 8(%rax)
; AVX-NEXT: retq
%z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll
index 1e9db9f55a8d5c..9538ea1061cd18 100644
--- a/llvm/test/CodeGen/X86/subcarry.ll
+++ b/llvm/test/CodeGen/X86/subcarry.ll
@@ -21,7 +21,7 @@ define i256 @sub256(i256 %a, i256 %b) nounwind {
; CHECK-LABEL: sub256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: subq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT: subq %r9, %rsi
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
diff --git a/llvm/test/CodeGen/X86/uadd_sat_vec.ll b/llvm/test/CodeGen/X86/uadd_sat_vec.ll
index 1ff95c876a6b17..d744ce6ed6af0b 100644
--- a/llvm/test/CodeGen/X86/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/uadd_sat_vec.ll
@@ -1161,11 +1161,11 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; SSE-LABEL: v2i128:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
-; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rsi
+; SSE-NEXT: addq %r9, %rsi
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; SSE-NEXT: movq $-1, %rdi
-; SSE-NEXT: cmovbq %rdi, %rdx
; SSE-NEXT: cmovbq %rdi, %rsi
+; SSE-NEXT: cmovbq %rdi, %rdx
; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8
; SSE-NEXT: cmovbq %rdi, %r8
@@ -1179,11 +1179,11 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; AVX-LABEL: v2i128:
; AVX: # %bb.0:
; AVX-NEXT: movq %rdi, %rax
-; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rsi
+; AVX-NEXT: addq %r9, %rsi
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; AVX-NEXT: movq $-1, %rdi
-; AVX-NEXT: cmovbq %rdi, %rdx
; AVX-NEXT: cmovbq %rdi, %rsi
+; AVX-NEXT: cmovbq %rdi, %rdx
; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8
; AVX-NEXT: cmovbq %rdi, %r8
diff --git a/llvm/test/CodeGen/X86/usub_sat_vec.ll b/llvm/test/CodeGen/X86/usub_sat_vec.ll
index 34eb30dfebeeb2..4e17ca6fbae33b 100644
--- a/llvm/test/CodeGen/X86/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/usub_sat_vec.ll
@@ -1057,10 +1057,10 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: xorl %edi, %edi
-; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rsi
+; SSE-NEXT: subq %r9, %rsi
; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
-; SSE-NEXT: cmovbq %rdi, %rdx
; SSE-NEXT: cmovbq %rdi, %rsi
+; SSE-NEXT: cmovbq %rdi, %rdx
; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rcx
; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
; SSE-NEXT: cmovbq %rdi, %r8
@@ -1075,10 +1075,10 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; AVX: # %bb.0:
; AVX-NEXT: movq %rdi, %rax
; AVX-NEXT: xorl %edi, %edi
-; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rsi
+; AVX-NEXT: subq %r9, %rsi
; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
-; AVX-NEXT: cmovbq %rdi, %rdx
; AVX-NEXT: cmovbq %rdi, %rsi
+; AVX-NEXT: cmovbq %rdi, %rdx
; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rcx
; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
; AVX-NEXT: cmovbq %rdi, %r8
More information about the llvm-commits
mailing list