[clang] fa1b6e6 - [X86] Fix i128 argument passing under SysV ABI
Nikita Popov via cfe-commits
cfe-commits at lists.llvm.org
Mon Aug 21 02:44:43 PDT 2023
Author: Nikita Popov
Date: 2023-08-21T11:44:35+02:00
New Revision: fa1b6e6b34eb6382c451f3a06a7c52d7ac6ada1d
URL: https://github.com/llvm/llvm-project/commit/fa1b6e6b34eb6382c451f3a06a7c52d7ac6ada1d
DIFF: https://github.com/llvm/llvm-project/commit/fa1b6e6b34eb6382c451f3a06a7c52d7ac6ada1d.diff
LOG: [X86] Fix i128 argument passing under SysV ABI
The x86_64 SysV ABI specifies that __int128 is passed either in
two registers (if available) or in a 16 byte aligned stack slot.
GCC implements this behavior. However, if only one free register
is available, LLVM will instead pass one half of the i128 in a
register, and the other on the stack.
Make sure that either both are passed in registers or both on the
stack.
Fixes https://github.com/llvm/llvm-project/issues/41784.
The patch is basically what craig.topper proposed to do there.
Differential Revision: https://reviews.llvm.org/D158169
Added:
Modified:
clang/docs/ReleaseNotes.rst
llvm/lib/Target/X86/X86CallingConv.td
llvm/test/CodeGen/X86/addcarry.ll
llvm/test/CodeGen/X86/i128-abi.ll
llvm/test/CodeGen/X86/sadd_sat_vec.ll
llvm/test/CodeGen/X86/ssub_sat_vec.ll
llvm/test/CodeGen/X86/subcarry.ll
llvm/test/CodeGen/X86/uadd_sat_vec.ll
llvm/test/CodeGen/X86/usub_sat_vec.ll
Removed:
################################################################################
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 94418a08671d78..57f945f2668217 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -46,6 +46,8 @@ C++ Specific Potentially Breaking Changes
ABI Changes in This Version
---------------------------
+- Following the SystemV ABI for x86-64, ``__int128`` arguments will no longer
+ be split between a register and a stack slot.
What's New in Clang |release|?
==============================
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 3ce59dc4aa61bd..19a295cd109627 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -572,6 +572,14 @@ def CC_X86_64_C : CallingConv<[
// The first 6 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
+
+ // i128 can be either passed in two i64 registers, or on the stack, but
+ // not split across register and stack. As such, do not allow using R9
+ // for a split i64.
+ CCIfType<[i64],
+ CCIfSplit<CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
+ CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [R9]>>>,
+
CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
// The first 8 MMX vector arguments are passed in XMM registers on Darwin.
diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll
index af8f921ef97733..231645f6415919 100644
--- a/llvm/test/CodeGen/X86/addcarry.ll
+++ b/llvm/test/CodeGen/X86/addcarry.ll
@@ -48,7 +48,7 @@ define i256 @add256(i256 %a, i256 %b) nounwind {
; CHECK-LABEL: add256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: addq %r9, %rsi
+; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %r8
diff --git a/llvm/test/CodeGen/X86/i128-abi.ll b/llvm/test/CodeGen/X86/i128-abi.ll
index 1d1df9d592d72b..d1d6f86e08fb8b 100644
--- a/llvm/test/CodeGen/X86/i128-abi.ll
+++ b/llvm/test/CodeGen/X86/i128-abi.ll
@@ -13,8 +13,8 @@ define i128 @in_reg(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i128 %a4) {
define i128 @on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5) {
; CHECK-LABEL: on_stack:
; CHECK: # %bb.0:
-; CHECK-NEXT: movq %r9, %rax
-; CHECK-NEXT: movq 8(%rsp), %rdx
+; CHECK-NEXT: movq 8(%rsp), %rax
+; CHECK-NEXT: movq 16(%rsp), %rdx
; CHECK-NEXT: retq
ret i128 %a5
}
@@ -22,7 +22,7 @@ define i128 @on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5) {
define i64 @trailing_arg_on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5, i64 %a6) {
; CHECK-LABEL: trailing_arg_on_stack:
; CHECK: # %bb.0:
-; CHECK-NEXT: movq 16(%rsp), %rax
+; CHECK-NEXT: movq 24(%rsp), %rax
; CHECK-NEXT: retq
ret i64 %a6
}
@@ -48,14 +48,17 @@ define void @call_on_stack(i128 %x) nounwind {
; CHECK-LABEL: call_on_stack:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: movq %rdi, %r9
-; CHECK-NEXT: movq %rsi, (%rsp)
; CHECK-NEXT: movl $1, %esi
; CHECK-NEXT: movl $2, %edx
; CHECK-NEXT: movl $3, %ecx
; CHECK-NEXT: movl $4, %r8d
; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: pushq %r9
; CHECK-NEXT: callq on_stack at PLT
+; CHECK-NEXT: addq $16, %rsp
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
call i128 @on_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i128 %x)
@@ -67,17 +70,19 @@ define void @call_trailing_arg_on_stack(i128 %x, i64 %y) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movq %rdx, %rax
-; CHECK-NEXT: movq %rsi, %r10
-; CHECK-NEXT: movq %rdi, %r9
+; CHECK-NEXT: movq %rsi, %r9
+; CHECK-NEXT: movq %rdi, %r10
+; CHECK-NEXT: subq $8, %rsp
; CHECK-NEXT: movl $1, %esi
; CHECK-NEXT: movl $2, %edx
; CHECK-NEXT: movl $3, %ecx
; CHECK-NEXT: movl $4, %r8d
; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: pushq %r9
; CHECK-NEXT: pushq %r10
; CHECK-NEXT: callq trailing_arg_on_stack at PLT
-; CHECK-NEXT: addq $16, %rsp
+; CHECK-NEXT: addq $32, %rsp
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
call i128 @trailing_arg_on_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i128 %x, i64 %y)
diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
index 8d914ba81a096a..45a8a6fd5449af 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
@@ -1795,27 +1795,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8
; SSE-NEXT: seto %dil
-; SSE-NEXT: movq %r8, %r10
-; SSE-NEXT: sarq $63, %r10
+; SSE-NEXT: movq %r8, %r9
+; SSE-NEXT: sarq $63, %r9
; SSE-NEXT: testb %dil, %dil
-; SSE-NEXT: cmovneq %r10, %rcx
-; SSE-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
-; SSE-NEXT: xorq %r11, %r10
+; SSE-NEXT: cmovneq %r9, %rcx
+; SSE-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
+; SSE-NEXT: xorq %r10, %r9
; SSE-NEXT: testb %dil, %dil
-; SSE-NEXT: cmoveq %r8, %r10
-; SSE-NEXT: addq %r9, %rsi
+; SSE-NEXT: cmoveq %r8, %r9
+; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rsi
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; SSE-NEXT: seto %dil
; SSE-NEXT: movq %rdx, %r8
; SSE-NEXT: sarq $63, %r8
; SSE-NEXT: testb %dil, %dil
; SSE-NEXT: cmovneq %r8, %rsi
-; SSE-NEXT: xorq %r11, %r8
+; SSE-NEXT: xorq %r10, %r8
; SSE-NEXT: testb %dil, %dil
; SSE-NEXT: cmoveq %rdx, %r8
; SSE-NEXT: movq %rcx, 16(%rax)
; SSE-NEXT: movq %rsi, (%rax)
-; SSE-NEXT: movq %r10, 24(%rax)
+; SSE-NEXT: movq %r9, 24(%rax)
; SSE-NEXT: movq %r8, 8(%rax)
; SSE-NEXT: retq
;
@@ -1825,27 +1825,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8
; AVX-NEXT: seto %dil
-; AVX-NEXT: movq %r8, %r10
-; AVX-NEXT: sarq $63, %r10
+; AVX-NEXT: movq %r8, %r9
+; AVX-NEXT: sarq $63, %r9
; AVX-NEXT: testb %dil, %dil
-; AVX-NEXT: cmovneq %r10, %rcx
-; AVX-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
-; AVX-NEXT: xorq %r11, %r10
+; AVX-NEXT: cmovneq %r9, %rcx
+; AVX-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
+; AVX-NEXT: xorq %r10, %r9
; AVX-NEXT: testb %dil, %dil
-; AVX-NEXT: cmoveq %r8, %r10
-; AVX-NEXT: addq %r9, %rsi
+; AVX-NEXT: cmoveq %r8, %r9
+; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rsi
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; AVX-NEXT: seto %dil
; AVX-NEXT: movq %rdx, %r8
; AVX-NEXT: sarq $63, %r8
; AVX-NEXT: testb %dil, %dil
; AVX-NEXT: cmovneq %r8, %rsi
-; AVX-NEXT: xorq %r11, %r8
+; AVX-NEXT: xorq %r10, %r8
; AVX-NEXT: testb %dil, %dil
; AVX-NEXT: cmoveq %rdx, %r8
; AVX-NEXT: movq %rcx, 16(%rax)
; AVX-NEXT: movq %rsi, (%rax)
-; AVX-NEXT: movq %r10, 24(%rax)
+; AVX-NEXT: movq %r9, 24(%rax)
; AVX-NEXT: movq %r8, 8(%rax)
; AVX-NEXT: retq
%z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
index 14f1985c60ff6b..d99d5aaa875365 100644
--- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
@@ -2026,27 +2026,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rcx
; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
; SSE-NEXT: seto %dil
-; SSE-NEXT: movq %r8, %r10
-; SSE-NEXT: sarq $63, %r10
+; SSE-NEXT: movq %r8, %r9
+; SSE-NEXT: sarq $63, %r9
; SSE-NEXT: testb %dil, %dil
-; SSE-NEXT: cmovneq %r10, %rcx
-; SSE-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
-; SSE-NEXT: xorq %r11, %r10
+; SSE-NEXT: cmovneq %r9, %rcx
+; SSE-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
+; SSE-NEXT: xorq %r10, %r9
; SSE-NEXT: testb %dil, %dil
-; SSE-NEXT: cmoveq %r8, %r10
-; SSE-NEXT: subq %r9, %rsi
+; SSE-NEXT: cmoveq %r8, %r9
+; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rsi
; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
; SSE-NEXT: seto %dil
; SSE-NEXT: movq %rdx, %r8
; SSE-NEXT: sarq $63, %r8
; SSE-NEXT: testb %dil, %dil
; SSE-NEXT: cmovneq %r8, %rsi
-; SSE-NEXT: xorq %r11, %r8
+; SSE-NEXT: xorq %r10, %r8
; SSE-NEXT: testb %dil, %dil
; SSE-NEXT: cmoveq %rdx, %r8
; SSE-NEXT: movq %rcx, 16(%rax)
; SSE-NEXT: movq %rsi, (%rax)
-; SSE-NEXT: movq %r10, 24(%rax)
+; SSE-NEXT: movq %r9, 24(%rax)
; SSE-NEXT: movq %r8, 8(%rax)
; SSE-NEXT: retq
;
@@ -2056,27 +2056,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rcx
; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
; AVX-NEXT: seto %dil
-; AVX-NEXT: movq %r8, %r10
-; AVX-NEXT: sarq $63, %r10
+; AVX-NEXT: movq %r8, %r9
+; AVX-NEXT: sarq $63, %r9
; AVX-NEXT: testb %dil, %dil
-; AVX-NEXT: cmovneq %r10, %rcx
-; AVX-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
-; AVX-NEXT: xorq %r11, %r10
+; AVX-NEXT: cmovneq %r9, %rcx
+; AVX-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
+; AVX-NEXT: xorq %r10, %r9
; AVX-NEXT: testb %dil, %dil
-; AVX-NEXT: cmoveq %r8, %r10
-; AVX-NEXT: subq %r9, %rsi
+; AVX-NEXT: cmoveq %r8, %r9
+; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rsi
; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
; AVX-NEXT: seto %dil
; AVX-NEXT: movq %rdx, %r8
; AVX-NEXT: sarq $63, %r8
; AVX-NEXT: testb %dil, %dil
; AVX-NEXT: cmovneq %r8, %rsi
-; AVX-NEXT: xorq %r11, %r8
+; AVX-NEXT: xorq %r10, %r8
; AVX-NEXT: testb %dil, %dil
; AVX-NEXT: cmoveq %rdx, %r8
; AVX-NEXT: movq %rcx, 16(%rax)
; AVX-NEXT: movq %rsi, (%rax)
-; AVX-NEXT: movq %r10, 24(%rax)
+; AVX-NEXT: movq %r9, 24(%rax)
; AVX-NEXT: movq %r8, 8(%rax)
; AVX-NEXT: retq
%z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll
index 9538ea1061cd18..1e9db9f55a8d5c 100644
--- a/llvm/test/CodeGen/X86/subcarry.ll
+++ b/llvm/test/CodeGen/X86/subcarry.ll
@@ -21,7 +21,7 @@ define i256 @sub256(i256 %a, i256 %b) nounwind {
; CHECK-LABEL: sub256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: subq %r9, %rsi
+; CHECK-NEXT: subq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
diff --git a/llvm/test/CodeGen/X86/uadd_sat_vec.ll b/llvm/test/CodeGen/X86/uadd_sat_vec.ll
index f97603ebea92b6..50c73009314a9d 100644
--- a/llvm/test/CodeGen/X86/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/uadd_sat_vec.ll
@@ -1161,11 +1161,11 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; SSE-LABEL: v2i128:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
-; SSE-NEXT: addq %r9, %rsi
+; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rsi
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; SSE-NEXT: movq $-1, %rdi
-; SSE-NEXT: cmovbq %rdi, %rsi
; SSE-NEXT: cmovbq %rdi, %rdx
+; SSE-NEXT: cmovbq %rdi, %rsi
; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8
; SSE-NEXT: cmovbq %rdi, %r8
@@ -1179,11 +1179,11 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; AVX-LABEL: v2i128:
; AVX: # %bb.0:
; AVX-NEXT: movq %rdi, %rax
-; AVX-NEXT: addq %r9, %rsi
+; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rsi
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; AVX-NEXT: movq $-1, %rdi
-; AVX-NEXT: cmovbq %rdi, %rsi
; AVX-NEXT: cmovbq %rdi, %rdx
+; AVX-NEXT: cmovbq %rdi, %rsi
; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8
; AVX-NEXT: cmovbq %rdi, %r8
diff --git a/llvm/test/CodeGen/X86/usub_sat_vec.ll b/llvm/test/CodeGen/X86/usub_sat_vec.ll
index a9cf02991d4282..8823b98c4ff8a6 100644
--- a/llvm/test/CodeGen/X86/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/usub_sat_vec.ll
@@ -1057,10 +1057,10 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: xorl %edi, %edi
-; SSE-NEXT: subq %r9, %rsi
+; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rsi
; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
-; SSE-NEXT: cmovbq %rdi, %rsi
; SSE-NEXT: cmovbq %rdi, %rdx
+; SSE-NEXT: cmovbq %rdi, %rsi
; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rcx
; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
; SSE-NEXT: cmovbq %rdi, %r8
@@ -1075,10 +1075,10 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; AVX: # %bb.0:
; AVX-NEXT: movq %rdi, %rax
; AVX-NEXT: xorl %edi, %edi
-; AVX-NEXT: subq %r9, %rsi
+; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rsi
; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
-; AVX-NEXT: cmovbq %rdi, %rsi
; AVX-NEXT: cmovbq %rdi, %rdx
+; AVX-NEXT: cmovbq %rdi, %rsi
; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rcx
; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
; AVX-NEXT: cmovbq %rdi, %r8
More information about the cfe-commits
mailing list