[llvm] [X86] Fix ABI for passing after i128 (PR #124134)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 24 02:14:07 PST 2025


https://github.com/nikic updated https://github.com/llvm/llvm-project/pull/124134

>From fe8a43b1cefad72b154a68c1ca666b37f4d7e238 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Thu, 23 Jan 2025 15:14:15 +0100
Subject: [PATCH 1/4] [X86] Fix ABI for passing after i128

If we're passing an i128 value and we no longer have enough argument
registers (only r9 unallocated), the value gets passed via the stack.
However, r9 is still allocated as a shadow register, which means that
a following i64 argument will not use it. This doesn't match the
x86-64 psABI.

Fix this by making i128 arguments as requiring consecutive registers,
and then adding a custom CC lowering that will allocate both parts of
the i128 at the same time, either to register or to stack, without
reserving a shadow register.

Fixes https://github.com/llvm/llvm-project/issues/123935.
---
 llvm/lib/Target/X86/X86CallingConv.cpp        | 36 +++++++++++++
 llvm/lib/Target/X86/X86CallingConv.td         |  6 +--
 llvm/lib/Target/X86/X86ISelLowering.h         |  4 ++
 llvm/lib/Target/X86/X86ISelLoweringCall.cpp   |  8 +++
 llvm/test/CodeGen/X86/addcarry.ll             |  2 +-
 .../CodeGen/X86/apx/flags-copy-lowering.ll    |  8 +--
 llvm/test/CodeGen/X86/avgflooru-i128.ll       |  2 +-
 llvm/test/CodeGen/X86/fmuladd-soft-float.ll   | 54 +++++++++----------
 llvm/test/CodeGen/X86/i128-abi.ll             | 10 ++--
 llvm/test/CodeGen/X86/sadd_sat_vec.ll         | 36 ++++++-------
 llvm/test/CodeGen/X86/ssub_sat_vec.ll         | 36 ++++++-------
 llvm/test/CodeGen/X86/subcarry.ll             |  2 +-
 llvm/test/CodeGen/X86/uadd_sat_vec.ll         |  8 +--
 llvm/test/CodeGen/X86/usub_sat_vec.ll         |  8 +--
 14 files changed, 132 insertions(+), 88 deletions(-)

diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp
index 7359ef341dde58..85718690b91197 100644
--- a/llvm/lib/Target/X86/X86CallingConv.cpp
+++ b/llvm/lib/Target/X86/X86CallingConv.cpp
@@ -340,5 +340,41 @@ static bool CC_X86_64_Pointer(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   return false;
 }
 
+/// Special handling for i128: Either allocate the value to two consecutive
+/// i64 registers, or to the stack. Do not partially allocate in registers,
+/// and do not reserve any registers when allocating to the stack.
+static bool CC_X86_64_I128(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                           CCValAssign::LocInfo &LocInfo,
+                           ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  assert(ValVT == MVT::i64 && "Should have i64 parts");
+  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+  PendingMembers.push_back(
+      CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+  if (!ArgFlags.isInConsecutiveRegsLast())
+    return true;
+
+  unsigned NumRegs = PendingMembers.size();
+  assert(NumRegs == 2 && "Should have two parts");
+
+  MCPhysReg Regs[] = {X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9};
+  ArrayRef<MCPhysReg> Allocated = State.AllocateRegBlock(Regs, NumRegs);
+  if (!Allocated.empty()) {
+    for (const auto &[Pending, Reg] : zip(PendingMembers, Allocated)) {
+      Pending.convertToReg(Reg);
+      State.addLoc(Pending);
+    }
+  } else {
+    int64_t Offset = State.AllocateStack(8, Align(16));
+    for (auto &Pending : PendingMembers) {
+      Pending.convertToMem(Offset);
+      State.addLoc(Pending);
+      Offset += 8;
+    }
+  }
+  PendingMembers.clear();
+  return true;
+}
+
 // Provides entry points of CC_X86 and RetCC_X86.
 #include "X86GenCallingConv.inc"
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 91af111db8cda5..72b103b0bb0c50 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -548,11 +548,9 @@ def CC_X86_64_C : CallingConv<[
   CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
 
   // i128 can be either passed in two i64 registers, or on the stack, but
-  // not split across register and stack. As such, do not allow using R9
-  // for a split i64.
+  // not split across register and stack. Handle this with a custom function.
   CCIfType<[i64],
-           CCIfSplit<CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
-  CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [R9]>>>,
+           CCIfConsecutiveRegs<CCCustom<"CC_X86_64_I128">>>,
 
   CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index e07bcd989c5188..fe79fefeed631c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1604,6 +1604,10 @@ namespace llvm {
         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
         unsigned &NumIntermediates, MVT &RegisterVT) const override;
 
+    bool functionArgumentNeedsConsecutiveRegisters(
+        Type *Ty, CallingConv::ID CallConv, bool isVarArg,
+        const DataLayout &DL) const override;
+
     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
 
     bool supportSwiftError() const override;
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index 4a4fd246cb7cdf..6835c7e336a5cb 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -233,6 +233,14 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
   return VT.changeVectorElementTypeToInteger();
 }
 
+bool X86TargetLowering::functionArgumentNeedsConsecutiveRegisters(
+    Type *Ty, CallingConv::ID CallConv, bool isVarArg,
+    const DataLayout &DL) const {
+  // i128 split into i64 needs to be allocated to two consecutive registers,
+  // or spilled to the stack as a whole.
+  return Ty->isIntegerTy(128);
+}
+
 /// Helper for getByValTypeAlignment to determine
 /// the desired ByVal argument alignment.
 static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll
index f8d32fc2d29252..97894db1188e28 100644
--- a/llvm/test/CodeGen/X86/addcarry.ll
+++ b/llvm/test/CodeGen/X86/addcarry.ll
@@ -49,7 +49,7 @@ define i256 @add256(i256 %a, i256 %b) nounwind {
 ; CHECK-LABEL: add256:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    addq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT:    addq %r9, %rsi
 ; CHECK-NEXT:    adcq {{[0-9]+}}(%rsp), %rdx
 ; CHECK-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx
 ; CHECK-NEXT:    adcq {{[0-9]+}}(%rsp), %r8
diff --git a/llvm/test/CodeGen/X86/apx/flags-copy-lowering.ll b/llvm/test/CodeGen/X86/apx/flags-copy-lowering.ll
index deca130a04ff04..bd764c2edef29d 100644
--- a/llvm/test/CodeGen/X86/apx/flags-copy-lowering.ll
+++ b/llvm/test/CodeGen/X86/apx/flags-copy-lowering.ll
@@ -31,15 +31,15 @@ define <2 x i128> @flag_copy_2(<2 x i128> %x, <2 x i128> %y) nounwind {
 ; CHECK-NEXT:    movq %r8, %rdi
 ; CHECK-NEXT:    {nf} sarq $63, %rdi
 ; CHECK-NEXT:    cmovoq %rdi, %rcx
-; CHECK-NEXT:    movabsq $-9223372036854775808, %r9 # imm = 0x8000000000000000
-; CHECK-NEXT:    {nf} xorq %r9, %rdi
+; CHECK-NEXT:    movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
+; CHECK-NEXT:    {nf} xorq %r10, %rdi
 ; CHECK-NEXT:    cmovnoq %r8, %rdi
-; CHECK-NEXT:    subq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT:    subq %r9, %rsi
 ; CHECK-NEXT:    sbbq {{[0-9]+}}(%rsp), %rdx
 ; CHECK-NEXT:    movq %rdx, %r8
 ; CHECK-NEXT:    {nf} sarq $63, %r8
 ; CHECK-NEXT:    cmovoq %r8, %rsi
-; CHECK-NEXT:    {nf} xorq %r9, %r8
+; CHECK-NEXT:    {nf} xorq %r10, %r8
 ; CHECK-NEXT:    cmovnoq %rdx, %r8
 ; CHECK-NEXT:    movq %rcx, 16(%rax)
 ; CHECK-NEXT:    movq %rsi, (%rax)
diff --git a/llvm/test/CodeGen/X86/avgflooru-i128.ll b/llvm/test/CodeGen/X86/avgflooru-i128.ll
index da16a7da48ca69..11e886e25ba4ea 100644
--- a/llvm/test/CodeGen/X86/avgflooru-i128.ll
+++ b/llvm/test/CodeGen/X86/avgflooru-i128.ll
@@ -119,7 +119,7 @@ define <2 x i128> @avgflooru_i128_vec(<2 x i128> %x, <2 x i128> %y) {
 ; CHECK-LABEL: avgflooru_i128_vec:
 ; CHECK:       # %bb.0: # %start
 ; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    addq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT:    addq %r9, %rsi
 ; CHECK-NEXT:    adcq {{[0-9]+}}(%rsp), %rdx
 ; CHECK-NEXT:    setb %dil
 ; CHECK-NEXT:    movzbl %dil, %edi
diff --git a/llvm/test/CodeGen/X86/fmuladd-soft-float.ll b/llvm/test/CodeGen/X86/fmuladd-soft-float.ll
index ccb2f37590b0ad..cbdfa32ed46274 100644
--- a/llvm/test/CodeGen/X86/fmuladd-soft-float.ll
+++ b/llvm/test/CodeGen/X86/fmuladd-soft-float.ll
@@ -1555,30 +1555,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
 ; SOFT-FLOAT-64-NEXT:    .cfi_offset %r14, -32
 ; SOFT-FLOAT-64-NEXT:    .cfi_offset %r15, -24
 ; SOFT-FLOAT-64-NEXT:    .cfi_offset %rbp, -16
+; SOFT-FLOAT-64-NEXT:    movq %r9, %rbp
 ; SOFT-FLOAT-64-NEXT:    movq %rcx, %r14
 ; SOFT-FLOAT-64-NEXT:    movq %rdx, %r15
-; SOFT-FLOAT-64-NEXT:    movq %rsi, %r12
+; SOFT-FLOAT-64-NEXT:    movq %rsi, %r13
 ; SOFT-FLOAT-64-NEXT:    movq %rdi, %rbx
-; SOFT-FLOAT-64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
 ; SOFT-FLOAT-64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-NEXT:    movq %r8, %rdi
 ; SOFT-FLOAT-64-NEXT:    callq __muldf3 at PLT
-; SOFT-FLOAT-64-NEXT:    movq %rax, %r13
+; SOFT-FLOAT-64-NEXT:    movq %rax, %r12
 ; SOFT-FLOAT-64-NEXT:    movq %r14, %rdi
-; SOFT-FLOAT-64-NEXT:    movq %rbp, %rsi
+; SOFT-FLOAT-64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-NEXT:    callq __muldf3 at PLT
 ; SOFT-FLOAT-64-NEXT:    movq %rax, %r14
 ; SOFT-FLOAT-64-NEXT:    movq %r15, %rdi
 ; SOFT-FLOAT-64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-NEXT:    callq __muldf3 at PLT
 ; SOFT-FLOAT-64-NEXT:    movq %rax, %r15
-; SOFT-FLOAT-64-NEXT:    movq %r12, %rdi
-; SOFT-FLOAT-64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; SOFT-FLOAT-64-NEXT:    movq %r13, %rdi
+; SOFT-FLOAT-64-NEXT:    movq %rbp, %rsi
 ; SOFT-FLOAT-64-NEXT:    callq __muldf3 at PLT
 ; SOFT-FLOAT-64-NEXT:    movq %rax, %rdi
 ; SOFT-FLOAT-64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-NEXT:    callq __adddf3 at PLT
-; SOFT-FLOAT-64-NEXT:    movq %rax, %r12
+; SOFT-FLOAT-64-NEXT:    movq %rax, %r13
 ; SOFT-FLOAT-64-NEXT:    movq %r15, %rdi
 ; SOFT-FLOAT-64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-NEXT:    callq __adddf3 at PLT
@@ -1587,13 +1587,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
 ; SOFT-FLOAT-64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-NEXT:    callq __adddf3 at PLT
 ; SOFT-FLOAT-64-NEXT:    movq %rax, %r14
-; SOFT-FLOAT-64-NEXT:    movq %r13, %rdi
+; SOFT-FLOAT-64-NEXT:    movq %r12, %rdi
 ; SOFT-FLOAT-64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-NEXT:    callq __adddf3 at PLT
 ; SOFT-FLOAT-64-NEXT:    movq %rax, 24(%rbx)
 ; SOFT-FLOAT-64-NEXT:    movq %r14, 16(%rbx)
 ; SOFT-FLOAT-64-NEXT:    movq %r15, 8(%rbx)
-; SOFT-FLOAT-64-NEXT:    movq %r12, (%rbx)
+; SOFT-FLOAT-64-NEXT:    movq %r13, (%rbx)
 ; SOFT-FLOAT-64-NEXT:    movq %rbx, %rax
 ; SOFT-FLOAT-64-NEXT:    addq $8, %rsp
 ; SOFT-FLOAT-64-NEXT:    .cfi_def_cfa_offset 56
@@ -1633,30 +1633,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
 ; SOFT-FLOAT-64-FMA-NEXT:    .cfi_offset %r14, -32
 ; SOFT-FLOAT-64-FMA-NEXT:    .cfi_offset %r15, -24
 ; SOFT-FLOAT-64-FMA-NEXT:    .cfi_offset %rbp, -16
+; SOFT-FLOAT-64-FMA-NEXT:    movq %r9, %rbp
 ; SOFT-FLOAT-64-FMA-NEXT:    movq %rcx, %r14
 ; SOFT-FLOAT-64-FMA-NEXT:    movq %rdx, %r15
-; SOFT-FLOAT-64-FMA-NEXT:    movq %rsi, %r12
+; SOFT-FLOAT-64-FMA-NEXT:    movq %rsi, %r13
 ; SOFT-FLOAT-64-FMA-NEXT:    movq %rdi, %rbx
-; SOFT-FLOAT-64-FMA-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
 ; SOFT-FLOAT-64-FMA-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-FMA-NEXT:    movq %r8, %rdi
 ; SOFT-FLOAT-64-FMA-NEXT:    callq __muldf3 at PLT
-; SOFT-FLOAT-64-FMA-NEXT:    movq %rax, %r13
+; SOFT-FLOAT-64-FMA-NEXT:    movq %rax, %r12
 ; SOFT-FLOAT-64-FMA-NEXT:    movq %r14, %rdi
-; SOFT-FLOAT-64-FMA-NEXT:    movq %rbp, %rsi
+; SOFT-FLOAT-64-FMA-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-FMA-NEXT:    callq __muldf3 at PLT
 ; SOFT-FLOAT-64-FMA-NEXT:    movq %rax, %r14
 ; SOFT-FLOAT-64-FMA-NEXT:    movq %r15, %rdi
 ; SOFT-FLOAT-64-FMA-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-FMA-NEXT:    callq __muldf3 at PLT
 ; SOFT-FLOAT-64-FMA-NEXT:    movq %rax, %r15
-; SOFT-FLOAT-64-FMA-NEXT:    movq %r12, %rdi
-; SOFT-FLOAT-64-FMA-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; SOFT-FLOAT-64-FMA-NEXT:    movq %r13, %rdi
+; SOFT-FLOAT-64-FMA-NEXT:    movq %rbp, %rsi
 ; SOFT-FLOAT-64-FMA-NEXT:    callq __muldf3 at PLT
 ; SOFT-FLOAT-64-FMA-NEXT:    movq %rax, %rdi
 ; SOFT-FLOAT-64-FMA-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-FMA-NEXT:    callq __adddf3 at PLT
-; SOFT-FLOAT-64-FMA-NEXT:    movq %rax, %r12
+; SOFT-FLOAT-64-FMA-NEXT:    movq %rax, %r13
 ; SOFT-FLOAT-64-FMA-NEXT:    movq %r15, %rdi
 ; SOFT-FLOAT-64-FMA-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-FMA-NEXT:    callq __adddf3 at PLT
@@ -1665,13 +1665,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
 ; SOFT-FLOAT-64-FMA-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-FMA-NEXT:    callq __adddf3 at PLT
 ; SOFT-FLOAT-64-FMA-NEXT:    movq %rax, %r14
-; SOFT-FLOAT-64-FMA-NEXT:    movq %r13, %rdi
+; SOFT-FLOAT-64-FMA-NEXT:    movq %r12, %rdi
 ; SOFT-FLOAT-64-FMA-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-FMA-NEXT:    callq __adddf3 at PLT
 ; SOFT-FLOAT-64-FMA-NEXT:    movq %rax, 24(%rbx)
 ; SOFT-FLOAT-64-FMA-NEXT:    movq %r14, 16(%rbx)
 ; SOFT-FLOAT-64-FMA-NEXT:    movq %r15, 8(%rbx)
-; SOFT-FLOAT-64-FMA-NEXT:    movq %r12, (%rbx)
+; SOFT-FLOAT-64-FMA-NEXT:    movq %r13, (%rbx)
 ; SOFT-FLOAT-64-FMA-NEXT:    movq %rbx, %rax
 ; SOFT-FLOAT-64-FMA-NEXT:    addq $8, %rsp
 ; SOFT-FLOAT-64-FMA-NEXT:    .cfi_def_cfa_offset 56
@@ -1711,30 +1711,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
 ; SOFT-FLOAT-64-FMA4-NEXT:    .cfi_offset %r14, -32
 ; SOFT-FLOAT-64-FMA4-NEXT:    .cfi_offset %r15, -24
 ; SOFT-FLOAT-64-FMA4-NEXT:    .cfi_offset %rbp, -16
+; SOFT-FLOAT-64-FMA4-NEXT:    movq %r9, %rbp
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq %rcx, %r14
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq %rdx, %r15
-; SOFT-FLOAT-64-FMA4-NEXT:    movq %rsi, %r12
+; SOFT-FLOAT-64-FMA4-NEXT:    movq %rsi, %r13
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq %rdi, %rbx
-; SOFT-FLOAT-64-FMA4-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq %r8, %rdi
 ; SOFT-FLOAT-64-FMA4-NEXT:    callq __muldf3 at PLT
-; SOFT-FLOAT-64-FMA4-NEXT:    movq %rax, %r13
+; SOFT-FLOAT-64-FMA4-NEXT:    movq %rax, %r12
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq %r14, %rdi
-; SOFT-FLOAT-64-FMA4-NEXT:    movq %rbp, %rsi
+; SOFT-FLOAT-64-FMA4-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-FMA4-NEXT:    callq __muldf3 at PLT
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq %rax, %r14
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq %r15, %rdi
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-FMA4-NEXT:    callq __muldf3 at PLT
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq %rax, %r15
-; SOFT-FLOAT-64-FMA4-NEXT:    movq %r12, %rdi
-; SOFT-FLOAT-64-FMA4-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; SOFT-FLOAT-64-FMA4-NEXT:    movq %r13, %rdi
+; SOFT-FLOAT-64-FMA4-NEXT:    movq %rbp, %rsi
 ; SOFT-FLOAT-64-FMA4-NEXT:    callq __muldf3 at PLT
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq %rax, %rdi
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-FMA4-NEXT:    callq __adddf3 at PLT
-; SOFT-FLOAT-64-FMA4-NEXT:    movq %rax, %r12
+; SOFT-FLOAT-64-FMA4-NEXT:    movq %rax, %r13
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq %r15, %rdi
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-FMA4-NEXT:    callq __adddf3 at PLT
@@ -1743,13 +1743,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-FMA4-NEXT:    callq __adddf3 at PLT
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq %rax, %r14
-; SOFT-FLOAT-64-FMA4-NEXT:    movq %r13, %rdi
+; SOFT-FLOAT-64-FMA4-NEXT:    movq %r12, %rdi
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
 ; SOFT-FLOAT-64-FMA4-NEXT:    callq __adddf3 at PLT
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq %rax, 24(%rbx)
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq %r14, 16(%rbx)
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq %r15, 8(%rbx)
-; SOFT-FLOAT-64-FMA4-NEXT:    movq %r12, (%rbx)
+; SOFT-FLOAT-64-FMA4-NEXT:    movq %r13, (%rbx)
 ; SOFT-FLOAT-64-FMA4-NEXT:    movq %rbx, %rax
 ; SOFT-FLOAT-64-FMA4-NEXT:    addq $8, %rsp
 ; SOFT-FLOAT-64-FMA4-NEXT:    .cfi_def_cfa_offset 56
diff --git a/llvm/test/CodeGen/X86/i128-abi.ll b/llvm/test/CodeGen/X86/i128-abi.ll
index 23eb6ec0322abd..264c546b4cae22 100644
--- a/llvm/test/CodeGen/X86/i128-abi.ll
+++ b/llvm/test/CodeGen/X86/i128-abi.ll
@@ -31,7 +31,7 @@ define i128 @on_stack2(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5, i1
 define i64 @trailing_arg_on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5, i64 %a6) {
 ; CHECK-LABEL: trailing_arg_on_stack:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq 24(%rsp), %rax
+; CHECK-NEXT:    movq %r9, %rax
 ; CHECK-NEXT:    retq
   ret i64 %a6
 }
@@ -78,20 +78,18 @@ define void @call_trailing_arg_on_stack(i128 %x, i64 %y) nounwind {
 ; CHECK-LABEL: call_trailing_arg_on_stack:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    movq %rdx, %rax
-; CHECK-NEXT:    movq %rsi, %r9
+; CHECK-NEXT:    movq %rdx, %r9
+; CHECK-NEXT:    movq %rsi, %rax
 ; CHECK-NEXT:    movq %rdi, %r10
-; CHECK-NEXT:    subq $8, %rsp
 ; CHECK-NEXT:    movl $1, %esi
 ; CHECK-NEXT:    movl $2, %edx
 ; CHECK-NEXT:    movl $3, %ecx
 ; CHECK-NEXT:    movl $4, %r8d
 ; CHECK-NEXT:    xorl %edi, %edi
 ; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    pushq %r9
 ; CHECK-NEXT:    pushq %r10
 ; CHECK-NEXT:    callq trailing_arg_on_stack at PLT
-; CHECK-NEXT:    addq $32, %rsp
+; CHECK-NEXT:    addq $16, %rsp
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    retq
   call i128 @trailing_arg_on_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i128 %x, i64 %y)
diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
index 322acd76e12e63..bd563f97b0ac4b 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
@@ -1795,27 +1795,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
 ; SSE-NEXT:    addq {{[0-9]+}}(%rsp), %rcx
 ; SSE-NEXT:    adcq {{[0-9]+}}(%rsp), %r8
 ; SSE-NEXT:    seto %dil
-; SSE-NEXT:    movq %r8, %r9
-; SSE-NEXT:    sarq $63, %r9
+; SSE-NEXT:    movq %r8, %r10
+; SSE-NEXT:    sarq $63, %r10
 ; SSE-NEXT:    testb %dil, %dil
-; SSE-NEXT:    cmovneq %r9, %rcx
-; SSE-NEXT:    movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
-; SSE-NEXT:    xorq %r10, %r9
+; SSE-NEXT:    cmovneq %r10, %rcx
+; SSE-NEXT:    movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
+; SSE-NEXT:    xorq %r11, %r10
 ; SSE-NEXT:    testb %dil, %dil
-; SSE-NEXT:    cmoveq %r8, %r9
-; SSE-NEXT:    addq {{[0-9]+}}(%rsp), %rsi
+; SSE-NEXT:    cmoveq %r8, %r10
+; SSE-NEXT:    addq %r9, %rsi
 ; SSE-NEXT:    adcq {{[0-9]+}}(%rsp), %rdx
 ; SSE-NEXT:    seto %dil
 ; SSE-NEXT:    movq %rdx, %r8
 ; SSE-NEXT:    sarq $63, %r8
 ; SSE-NEXT:    testb %dil, %dil
 ; SSE-NEXT:    cmovneq %r8, %rsi
-; SSE-NEXT:    xorq %r10, %r8
+; SSE-NEXT:    xorq %r11, %r8
 ; SSE-NEXT:    testb %dil, %dil
 ; SSE-NEXT:    cmoveq %rdx, %r8
 ; SSE-NEXT:    movq %rcx, 16(%rax)
 ; SSE-NEXT:    movq %rsi, (%rax)
-; SSE-NEXT:    movq %r9, 24(%rax)
+; SSE-NEXT:    movq %r10, 24(%rax)
 ; SSE-NEXT:    movq %r8, 8(%rax)
 ; SSE-NEXT:    retq
 ;
@@ -1825,27 +1825,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
 ; AVX-NEXT:    addq {{[0-9]+}}(%rsp), %rcx
 ; AVX-NEXT:    adcq {{[0-9]+}}(%rsp), %r8
 ; AVX-NEXT:    seto %dil
-; AVX-NEXT:    movq %r8, %r9
-; AVX-NEXT:    sarq $63, %r9
+; AVX-NEXT:    movq %r8, %r10
+; AVX-NEXT:    sarq $63, %r10
 ; AVX-NEXT:    testb %dil, %dil
-; AVX-NEXT:    cmovneq %r9, %rcx
-; AVX-NEXT:    movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
-; AVX-NEXT:    xorq %r10, %r9
+; AVX-NEXT:    cmovneq %r10, %rcx
+; AVX-NEXT:    movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
+; AVX-NEXT:    xorq %r11, %r10
 ; AVX-NEXT:    testb %dil, %dil
-; AVX-NEXT:    cmoveq %r8, %r9
-; AVX-NEXT:    addq {{[0-9]+}}(%rsp), %rsi
+; AVX-NEXT:    cmoveq %r8, %r10
+; AVX-NEXT:    addq %r9, %rsi
 ; AVX-NEXT:    adcq {{[0-9]+}}(%rsp), %rdx
 ; AVX-NEXT:    seto %dil
 ; AVX-NEXT:    movq %rdx, %r8
 ; AVX-NEXT:    sarq $63, %r8
 ; AVX-NEXT:    testb %dil, %dil
 ; AVX-NEXT:    cmovneq %r8, %rsi
-; AVX-NEXT:    xorq %r10, %r8
+; AVX-NEXT:    xorq %r11, %r8
 ; AVX-NEXT:    testb %dil, %dil
 ; AVX-NEXT:    cmoveq %rdx, %r8
 ; AVX-NEXT:    movq %rcx, 16(%rax)
 ; AVX-NEXT:    movq %rsi, (%rax)
-; AVX-NEXT:    movq %r9, 24(%rax)
+; AVX-NEXT:    movq %r10, 24(%rax)
 ; AVX-NEXT:    movq %r8, 8(%rax)
 ; AVX-NEXT:    retq
   %z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
index ac8b561abf0033..88df3c175ec9cc 100644
--- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
@@ -2026,27 +2026,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
 ; SSE-NEXT:    subq {{[0-9]+}}(%rsp), %rcx
 ; SSE-NEXT:    sbbq {{[0-9]+}}(%rsp), %r8
 ; SSE-NEXT:    seto %dil
-; SSE-NEXT:    movq %r8, %r9
-; SSE-NEXT:    sarq $63, %r9
+; SSE-NEXT:    movq %r8, %r10
+; SSE-NEXT:    sarq $63, %r10
 ; SSE-NEXT:    testb %dil, %dil
-; SSE-NEXT:    cmovneq %r9, %rcx
-; SSE-NEXT:    movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
-; SSE-NEXT:    xorq %r10, %r9
+; SSE-NEXT:    cmovneq %r10, %rcx
+; SSE-NEXT:    movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
+; SSE-NEXT:    xorq %r11, %r10
 ; SSE-NEXT:    testb %dil, %dil
-; SSE-NEXT:    cmoveq %r8, %r9
-; SSE-NEXT:    subq {{[0-9]+}}(%rsp), %rsi
+; SSE-NEXT:    cmoveq %r8, %r10
+; SSE-NEXT:    subq %r9, %rsi
 ; SSE-NEXT:    sbbq {{[0-9]+}}(%rsp), %rdx
 ; SSE-NEXT:    seto %dil
 ; SSE-NEXT:    movq %rdx, %r8
 ; SSE-NEXT:    sarq $63, %r8
 ; SSE-NEXT:    testb %dil, %dil
 ; SSE-NEXT:    cmovneq %r8, %rsi
-; SSE-NEXT:    xorq %r10, %r8
+; SSE-NEXT:    xorq %r11, %r8
 ; SSE-NEXT:    testb %dil, %dil
 ; SSE-NEXT:    cmoveq %rdx, %r8
 ; SSE-NEXT:    movq %rcx, 16(%rax)
 ; SSE-NEXT:    movq %rsi, (%rax)
-; SSE-NEXT:    movq %r9, 24(%rax)
+; SSE-NEXT:    movq %r10, 24(%rax)
 ; SSE-NEXT:    movq %r8, 8(%rax)
 ; SSE-NEXT:    retq
 ;
@@ -2056,27 +2056,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
 ; AVX-NEXT:    subq {{[0-9]+}}(%rsp), %rcx
 ; AVX-NEXT:    sbbq {{[0-9]+}}(%rsp), %r8
 ; AVX-NEXT:    seto %dil
-; AVX-NEXT:    movq %r8, %r9
-; AVX-NEXT:    sarq $63, %r9
+; AVX-NEXT:    movq %r8, %r10
+; AVX-NEXT:    sarq $63, %r10
 ; AVX-NEXT:    testb %dil, %dil
-; AVX-NEXT:    cmovneq %r9, %rcx
-; AVX-NEXT:    movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
-; AVX-NEXT:    xorq %r10, %r9
+; AVX-NEXT:    cmovneq %r10, %rcx
+; AVX-NEXT:    movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
+; AVX-NEXT:    xorq %r11, %r10
 ; AVX-NEXT:    testb %dil, %dil
-; AVX-NEXT:    cmoveq %r8, %r9
-; AVX-NEXT:    subq {{[0-9]+}}(%rsp), %rsi
+; AVX-NEXT:    cmoveq %r8, %r10
+; AVX-NEXT:    subq %r9, %rsi
 ; AVX-NEXT:    sbbq {{[0-9]+}}(%rsp), %rdx
 ; AVX-NEXT:    seto %dil
 ; AVX-NEXT:    movq %rdx, %r8
 ; AVX-NEXT:    sarq $63, %r8
 ; AVX-NEXT:    testb %dil, %dil
 ; AVX-NEXT:    cmovneq %r8, %rsi
-; AVX-NEXT:    xorq %r10, %r8
+; AVX-NEXT:    xorq %r11, %r8
 ; AVX-NEXT:    testb %dil, %dil
 ; AVX-NEXT:    cmoveq %rdx, %r8
 ; AVX-NEXT:    movq %rcx, 16(%rax)
 ; AVX-NEXT:    movq %rsi, (%rax)
-; AVX-NEXT:    movq %r9, 24(%rax)
+; AVX-NEXT:    movq %r10, 24(%rax)
 ; AVX-NEXT:    movq %r8, 8(%rax)
 ; AVX-NEXT:    retq
   %z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll
index 1e9db9f55a8d5c..9538ea1061cd18 100644
--- a/llvm/test/CodeGen/X86/subcarry.ll
+++ b/llvm/test/CodeGen/X86/subcarry.ll
@@ -21,7 +21,7 @@ define i256 @sub256(i256 %a, i256 %b) nounwind {
 ; CHECK-LABEL: sub256:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    subq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT:    subq %r9, %rsi
 ; CHECK-NEXT:    sbbq {{[0-9]+}}(%rsp), %rdx
 ; CHECK-NEXT:    sbbq {{[0-9]+}}(%rsp), %rcx
 ; CHECK-NEXT:    sbbq {{[0-9]+}}(%rsp), %r8
diff --git a/llvm/test/CodeGen/X86/uadd_sat_vec.ll b/llvm/test/CodeGen/X86/uadd_sat_vec.ll
index 1ff95c876a6b17..d744ce6ed6af0b 100644
--- a/llvm/test/CodeGen/X86/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/uadd_sat_vec.ll
@@ -1161,11 +1161,11 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
 ; SSE-LABEL: v2i128:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movq %rdi, %rax
-; SSE-NEXT:    addq {{[0-9]+}}(%rsp), %rsi
+; SSE-NEXT:    addq %r9, %rsi
 ; SSE-NEXT:    adcq {{[0-9]+}}(%rsp), %rdx
 ; SSE-NEXT:    movq $-1, %rdi
-; SSE-NEXT:    cmovbq %rdi, %rdx
 ; SSE-NEXT:    cmovbq %rdi, %rsi
+; SSE-NEXT:    cmovbq %rdi, %rdx
 ; SSE-NEXT:    addq {{[0-9]+}}(%rsp), %rcx
 ; SSE-NEXT:    adcq {{[0-9]+}}(%rsp), %r8
 ; SSE-NEXT:    cmovbq %rdi, %r8
@@ -1179,11 +1179,11 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
 ; AVX-LABEL: v2i128:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    movq %rdi, %rax
-; AVX-NEXT:    addq {{[0-9]+}}(%rsp), %rsi
+; AVX-NEXT:    addq %r9, %rsi
 ; AVX-NEXT:    adcq {{[0-9]+}}(%rsp), %rdx
 ; AVX-NEXT:    movq $-1, %rdi
-; AVX-NEXT:    cmovbq %rdi, %rdx
 ; AVX-NEXT:    cmovbq %rdi, %rsi
+; AVX-NEXT:    cmovbq %rdi, %rdx
 ; AVX-NEXT:    addq {{[0-9]+}}(%rsp), %rcx
 ; AVX-NEXT:    adcq {{[0-9]+}}(%rsp), %r8
 ; AVX-NEXT:    cmovbq %rdi, %r8
diff --git a/llvm/test/CodeGen/X86/usub_sat_vec.ll b/llvm/test/CodeGen/X86/usub_sat_vec.ll
index 34eb30dfebeeb2..4e17ca6fbae33b 100644
--- a/llvm/test/CodeGen/X86/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/usub_sat_vec.ll
@@ -1057,10 +1057,10 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movq %rdi, %rax
 ; SSE-NEXT:    xorl %edi, %edi
-; SSE-NEXT:    subq {{[0-9]+}}(%rsp), %rsi
+; SSE-NEXT:    subq %r9, %rsi
 ; SSE-NEXT:    sbbq {{[0-9]+}}(%rsp), %rdx
-; SSE-NEXT:    cmovbq %rdi, %rdx
 ; SSE-NEXT:    cmovbq %rdi, %rsi
+; SSE-NEXT:    cmovbq %rdi, %rdx
 ; SSE-NEXT:    subq {{[0-9]+}}(%rsp), %rcx
 ; SSE-NEXT:    sbbq {{[0-9]+}}(%rsp), %r8
 ; SSE-NEXT:    cmovbq %rdi, %r8
@@ -1075,10 +1075,10 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    movq %rdi, %rax
 ; AVX-NEXT:    xorl %edi, %edi
-; AVX-NEXT:    subq {{[0-9]+}}(%rsp), %rsi
+; AVX-NEXT:    subq %r9, %rsi
 ; AVX-NEXT:    sbbq {{[0-9]+}}(%rsp), %rdx
-; AVX-NEXT:    cmovbq %rdi, %rdx
 ; AVX-NEXT:    cmovbq %rdi, %rsi
+; AVX-NEXT:    cmovbq %rdi, %rdx
 ; AVX-NEXT:    subq {{[0-9]+}}(%rsp), %rcx
 ; AVX-NEXT:    sbbq {{[0-9]+}}(%rsp), %r8
 ; AVX-NEXT:    cmovbq %rdi, %r8

>From c0e6abf3e7815c10ef087a7ea3b59e185802f9d4 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Thu, 23 Jan 2025 17:36:41 +0100
Subject: [PATCH 2/4] Use size 16

---
 llvm/lib/Target/X86/X86CallingConv.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp
index 85718690b91197..73f3942fc0c34a 100644
--- a/llvm/lib/Target/X86/X86CallingConv.cpp
+++ b/llvm/lib/Target/X86/X86CallingConv.cpp
@@ -365,7 +365,7 @@ static bool CC_X86_64_I128(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
       State.addLoc(Pending);
     }
   } else {
-    int64_t Offset = State.AllocateStack(8, Align(16));
+    int64_t Offset = State.AllocateStack(16, Align(16));
     for (auto &Pending : PendingMembers) {
       Pending.convertToMem(Offset);
       State.addLoc(Pending);

>From 75ace0e5d546901033961b9053cf2aed288d4e7a Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Fri, 24 Jan 2025 10:07:35 +0100
Subject: [PATCH 3/4] Use PendingMembers[0/1] instead of loop

---
 llvm/lib/Target/X86/X86CallingConv.cpp | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp
index 73f3942fc0c34a..859c1366292aca 100644
--- a/llvm/lib/Target/X86/X86CallingConv.cpp
+++ b/llvm/lib/Target/X86/X86CallingConv.cpp
@@ -360,18 +360,15 @@ static bool CC_X86_64_I128(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   MCPhysReg Regs[] = {X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9};
   ArrayRef<MCPhysReg> Allocated = State.AllocateRegBlock(Regs, NumRegs);
   if (!Allocated.empty()) {
-    for (const auto &[Pending, Reg] : zip(PendingMembers, Allocated)) {
-      Pending.convertToReg(Reg);
-      State.addLoc(Pending);
-    }
+    PendingMembers[0].convertToReg(Allocated[0]);
+    PendingMembers[1].convertToReg(Allocated[1]);
   } else {
     int64_t Offset = State.AllocateStack(16, Align(16));
-    for (auto &Pending : PendingMembers) {
-      Pending.convertToMem(Offset);
-      State.addLoc(Pending);
-      Offset += 8;
-    }
+    PendingMembers[0].convertToMem(Offset);
+    PendingMembers[1].convertToMem(Offset + 8);
   }
+  State.addLoc(PendingMembers[0]);
+  State.addLoc(PendingMembers[1]);
   PendingMembers.clear();
   return true;
 }

>From 86f422215ea959204605d7d494cb0917a3d14ba4 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Fri, 24 Jan 2025 11:12:46 +0100
Subject: [PATCH 4/4] use static const

---
 llvm/lib/Target/X86/X86CallingConv.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp
index 859c1366292aca..0b4c63f7a81f78 100644
--- a/llvm/lib/Target/X86/X86CallingConv.cpp
+++ b/llvm/lib/Target/X86/X86CallingConv.cpp
@@ -357,7 +357,8 @@ static bool CC_X86_64_I128(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   unsigned NumRegs = PendingMembers.size();
   assert(NumRegs == 2 && "Should have two parts");
 
-  MCPhysReg Regs[] = {X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9};
+  static const MCPhysReg Regs[] = {X86::RDI, X86::RSI, X86::RDX,
+                                   X86::RCX, X86::R8,  X86::R9};
   ArrayRef<MCPhysReg> Allocated = State.AllocateRegBlock(Regs, NumRegs);
   if (!Allocated.empty()) {
     PendingMembers[0].convertToReg(Allocated[0]);



More information about the llvm-commits mailing list