[llvm] 40cd26c - [Win64] Handle FP arguments more gracefully under -mno-sse
Reid Kleckner via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 14 17:19:41 PST 2020
Author: Reid Kleckner
Date: 2020-01-14T17:19:35-08:00
New Revision: 40cd26c7008183e01d8276396339aea2a99d83d7
URL: https://github.com/llvm/llvm-project/commit/40cd26c7008183e01d8276396339aea2a99d83d7
DIFF: https://github.com/llvm/llvm-project/commit/40cd26c7008183e01d8276396339aea2a99d83d7.diff
LOG: [Win64] Handle FP arguments more gracefully under -mno-sse
Pass small FP values in GPRs or stack memory according the the normal
convention. This is what gcc -mno-sse does on Win64.
I adjusted the conditions under which we emit an error to check if the
argument or return value would be passed in an XMM register when SSE is
disabled. This has a side effect of no longer emitting an error for FP
arguments marked 'inreg' when targetting x86 with SSE disabled. Our
calling convention logic was already assigning it to FP0/FP1, and then
we emitted this error. That seems unnecessary, we can ignore 'inreg' and
compile it without SSE.
Reviewers: jyknight, aemerson
Differential Revision: https://reviews.llvm.org/D70465
Added:
llvm/test/CodeGen/X86/no-sse-win64.ll
llvm/test/CodeGen/X86/no-sse-x86.ll
Modified:
llvm/lib/Target/X86/X86CallingConv.td
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
llvm/test/CodeGen/X86/nosse-error2.ll
################################################################################
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 30d05c638146..db1aef2fd09d 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -346,6 +346,10 @@ def RetCC_X86_Win64_C : CallingConv<[
// The X86-Win64 calling convention always returns __m64 values in RAX.
CCIfType<[x86mmx], CCBitConvertToType<i64>>,
+ // GCC returns FP values in RAX on Win64.
+ CCIfType<[f32], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i32>>>,
+ CCIfType<[f64], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i64>>>,
+
// Otherwise, everything is the same as 'normal' X86-64 C CC.
CCDelegateTo<RetCC_X86_64_C>
]>;
@@ -613,7 +617,6 @@ def CC_X86_Win64_C : CallingConv<[
// 128 bit vectors are passed by pointer
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
-
// 256 bit vectors are passed by pointer
CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCPassIndirect<i64>>,
@@ -626,6 +629,16 @@ def CC_X86_Win64_C : CallingConv<[
// The first 4 MMX vector arguments are passed in GPRs.
CCIfType<[x86mmx], CCBitConvertToType<i64>>,
+ // If SSE was disabled, pass FP values smaller than 64-bits as integers in
+ // GPRs or on the stack.
+ CCIfType<[f32], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i32>>>,
+ CCIfType<[f64], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i64>>>,
+
+ // The first 4 FP/Vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64],
+ CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3],
+ [RCX , RDX , R8 , R9 ]>>,
+
// The first 4 integer arguments are passed in integer registers.
CCIfType<[i8 ], CCAssignToRegWithShadow<[CL , DL , R8B , R9B ],
[XMM0, XMM1, XMM2, XMM3]>>,
@@ -643,11 +656,6 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ],
[XMM0, XMM1, XMM2, XMM3]>>,
- // The first 4 FP/Vector arguments are passed in XMM registers.
- CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3],
- [RCX , RDX , R8 , R9 ]>>,
-
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
CCIfType<[i8, i16, i32, i64, f32, f64], CCAssignToStack<8, 8>>
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 664c6b9af767..d7593f525731 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2693,18 +2693,16 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
assert(VA.getLocInfo() != CCValAssign::FPExt &&
"Unexpected FP-extend for return value.");
- // If this is x86-64, and we disabled SSE, we can't return FP values,
- // or SSE or MMX vectors.
- if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
- VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
- (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
+ // Report an error if we have attempted to return a value via an XMM
+ // register and SSE was disabled.
+ if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
- } else if (ValVT == MVT::f64 &&
- (Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
- // Likewise we can't return F64 values with SSE1 only. gcc does so, but
- // llvm-gcc has never done it right and no one has noticed, so this
- // should be OK for now.
+ } else if (!Subtarget.hasSSE2() &&
+ X86::FR64XRegClass.contains(VA.getLocReg()) &&
+ ValVT == MVT::f64) {
+ // When returning a double via an XMM register, report an error if SSE2 is
+ // not enabled.
errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
}
@@ -2999,7 +2997,6 @@ SDValue X86TargetLowering::LowerCallResult(
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- bool Is64Bit = Subtarget.is64Bit();
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
@@ -3018,16 +3015,17 @@ SDValue X86TargetLowering::LowerCallResult(
RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
}
- // If this is x86-64, and we disabled SSE, we can't return FP values
- if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
- ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
+ // Report an error if there was an attempt to return FP values via XMM
+ // registers.
+ if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
if (VA.getLocReg() == X86::XMM1)
VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
else
VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
- } else if (CopyVT == MVT::f64 &&
- (Is64Bit && !Subtarget.hasSSE2())) {
+ } else if (!Subtarget.hasSSE2() &&
+ X86::FR64XRegClass.contains(VA.getLocReg()) &&
+ CopyVT == MVT::f64) {
errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
if (VA.getLocReg() == X86::XMM1)
VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
@@ -3074,6 +3072,9 @@ SDValue X86TargetLowering::LowerCallResult(
Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
}
+ if (VA.getLocInfo() == CCValAssign::BCvt)
+ Val = DAG.getBitcast(VA.getValVT(), Val);
+
InVals.push_back(Val);
}
diff --git a/llvm/test/CodeGen/X86/no-sse-win64.ll b/llvm/test/CodeGen/X86/no-sse-win64.ll
new file mode 100644
index 000000000000..c220b9606129
--- /dev/null
+++ b/llvm/test/CodeGen/X86/no-sse-win64.ll
@@ -0,0 +1,129 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-windows-msvc < %s -mattr=-sse | FileCheck %s
+; RUN: llc -mtriple=x86_64-windows-gnu < %s -mattr=-sse | FileCheck %s
+
+define void @recv_double(double %v, double* %p) {
+; CHECK-LABEL: recv_double:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rcx, (%rdx)
+; CHECK-NEXT: retq
+ store double %v, double* %p
+ ret void
+}
+
+define void @recv_float(float %v, float* %p) {
+; CHECK-LABEL: recv_float:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %ecx, (%rdx)
+; CHECK-NEXT: retq
+ store float %v, float* %p
+ ret void
+}
+
+define dso_local double @ret_double(double* %p) {
+; CHECK-LABEL: ret_double:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq (%rcx), %rax
+; CHECK-NEXT: retq
+entry:
+ %v = load double, double* %p
+ ret double %v
+}
+
+define dso_local float @ret_float(float* %p) {
+; CHECK-LABEL: ret_float:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl (%rcx), %eax
+; CHECK-NEXT: retq
+entry:
+ %v = load float, float* %p
+ ret float %v
+}
+
+declare void @take_double(double)
+declare void @take_float(float)
+
+define void @pass_double(double* %p) {
+; CHECK-LABEL: pass_double:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .seh_stackalloc 40
+; CHECK-NEXT: .seh_endprologue
+; CHECK-NEXT: movq (%rcx), %rcx
+; CHECK-NEXT: callq take_double
+; CHECK-NEXT: nop
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: retq
+; CHECK-NEXT: .seh_handlerdata
+; CHECK-NEXT: .text
+; CHECK-NEXT: .seh_endproc
+ %v = load double, double* %p
+ call void @take_double(double %v)
+ ret void
+}
+
+define void @pass_float(float* %p) {
+; CHECK-LABEL: pass_float:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .seh_stackalloc 40
+; CHECK-NEXT: .seh_endprologue
+; CHECK-NEXT: movl (%rcx), %ecx
+; CHECK-NEXT: callq take_float
+; CHECK-NEXT: nop
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: retq
+; CHECK-NEXT: .seh_handlerdata
+; CHECK-NEXT: .text
+; CHECK-NEXT: .seh_endproc
+ %v = load float, float* %p
+ call void @take_float(float %v)
+ ret void
+}
+
+declare double @produce_double()
+declare float @produce_float()
+
+define void @call_double(double* %p) {
+; CHECK-LABEL: call_double:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rsi
+; CHECK-NEXT: .seh_pushreg %rsi
+; CHECK-NEXT: subq $32, %rsp
+; CHECK-NEXT: .seh_stackalloc 32
+; CHECK-NEXT: .seh_endprologue
+; CHECK-NEXT: movq %rcx, %rsi
+; CHECK-NEXT: callq produce_double
+; CHECK-NEXT: movq %rax, (%rsi)
+; CHECK-NEXT: addq $32, %rsp
+; CHECK-NEXT: popq %rsi
+; CHECK-NEXT: retq
+; CHECK-NEXT: .seh_handlerdata
+; CHECK-NEXT: .text
+; CHECK-NEXT: .seh_endproc
+ %v = call double @produce_double()
+ store double %v, double* %p
+ ret void
+}
+
+define void @call_float(float* %p) {
+; CHECK-LABEL: call_float:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rsi
+; CHECK-NEXT: .seh_pushreg %rsi
+; CHECK-NEXT: subq $32, %rsp
+; CHECK-NEXT: .seh_stackalloc 32
+; CHECK-NEXT: .seh_endprologue
+; CHECK-NEXT: movq %rcx, %rsi
+; CHECK-NEXT: callq produce_float
+; CHECK-NEXT: movl %eax, (%rsi)
+; CHECK-NEXT: addq $32, %rsp
+; CHECK-NEXT: popq %rsi
+; CHECK-NEXT: retq
+; CHECK-NEXT: .seh_handlerdata
+; CHECK-NEXT: .text
+; CHECK-NEXT: .seh_endproc
+ %v = call float @produce_float()
+ store float %v, float* %p
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/no-sse-x86.ll b/llvm/test/CodeGen/X86/no-sse-x86.ll
new file mode 100644
index 000000000000..45fea53af7fb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/no-sse-x86.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mcpu=i686 -mattr=+sse | FileCheck %s
+; RUN: llc < %s -mcpu=i686 -mattr=-sse 2>&1 | FileCheck --check-prefix NOSSE %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-unknown-linux-gnu"
+ at f = external global float
+ at d = external global double
+
+define void @test() nounwind {
+; CHECK-LABEL: test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subl $12, %esp
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: movss %xmm0, (%esp)
+; CHECK-NEXT: calll foo1
+; CHECK-NEXT: fstps f
+; CHECK-NEXT: fldl d
+; CHECK-NEXT: fstpl (%esp)
+; CHECK-NEXT: calll foo2
+; CHECK-NEXT: fstpl d
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: movss %xmm0, (%esp)
+; CHECK-NEXT: calll foo3
+; CHECK-NEXT: fstps f
+; CHECK-NEXT: fldl d
+; CHECK-NEXT: fstpl (%esp)
+; CHECK-NEXT: calll foo4
+; CHECK-NEXT: fstpl d
+; CHECK-NEXT: addl $12, %esp
+; CHECK-NEXT: retl
+;
+; NOSSE-LABEL: test:
+; NOSSE: # %bb.0: # %entry
+; NOSSE-NEXT: subl $12, %esp
+; NOSSE-NEXT: flds f
+; NOSSE-NEXT: fstps (%esp)
+; NOSSE-NEXT: calll foo1
+; NOSSE-NEXT: fstps f
+; NOSSE-NEXT: fldl d
+; NOSSE-NEXT: fstpl (%esp)
+; NOSSE-NEXT: calll foo2
+; NOSSE-NEXT: fstpl d
+; NOSSE-NEXT: flds f
+; NOSSE-NEXT: fstps (%esp)
+; NOSSE-NEXT: calll foo3
+; NOSSE-NEXT: fstps f
+; NOSSE-NEXT: fldl d
+; NOSSE-NEXT: fstpl (%esp)
+; NOSSE-NEXT: calll foo4
+; NOSSE-NEXT: fstpl d
+; NOSSE-NEXT: addl $12, %esp
+; NOSSE-NEXT: retl
+entry:
+ %0 = load float, float* @f, align 4
+ %1 = tail call inreg float @foo1(float inreg %0) nounwind
+ store float %1, float* @f, align 4
+ %2 = load double, double* @d, align 8
+ %3 = tail call inreg double @foo2(double inreg %2) nounwind
+ store double %3, double* @d, align 8
+ %4 = load float, float* @f, align 4
+ %5 = tail call inreg float @foo3(float inreg %4) nounwind
+ store float %5, float* @f, align 4
+ %6 = load double, double* @d, align 8
+ %7 = tail call inreg double @foo4(double inreg %6) nounwind
+ store double %7, double* @d, align 8
+ ret void
+}
+
+declare inreg float @foo1(float inreg)
+
+declare inreg double @foo2(double inreg)
+
+declare inreg float @foo3(float inreg)
+
+declare inreg double @foo4(double inreg)
diff --git a/llvm/test/CodeGen/X86/nosse-error2.ll b/llvm/test/CodeGen/X86/nosse-error2.ll
deleted file mode 100644
index b88ddf85e0ef..000000000000
--- a/llvm/test/CodeGen/X86/nosse-error2.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: not llc < %s -mcpu=i686 -mattr=-sse 2>&1 | FileCheck --check-prefix NOSSE %s
-; RUN: llc < %s -mcpu=i686 -mattr=+sse | FileCheck %s
-
-; NOSSE: {{SSE register return with SSE disabled}}
-
-; CHECK: xmm
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-unknown-linux-gnu"
- at f = external global float ; <float*> [#uses=4]
- at d = external global double ; <double*> [#uses=4]
-
-define void @test() nounwind {
-entry:
- %0 = load float, float* @f, align 4 ; <float> [#uses=1]
- %1 = tail call inreg float @foo1(float inreg %0) nounwind ; <float> [#uses=1]
- store float %1, float* @f, align 4
- %2 = load double, double* @d, align 8 ; <double> [#uses=1]
- %3 = tail call inreg double @foo2(double inreg %2) nounwind ; <double> [#uses=1]
- store double %3, double* @d, align 8
- %4 = load float, float* @f, align 4 ; <float> [#uses=1]
- %5 = tail call inreg float @foo3(float inreg %4) nounwind ; <float> [#uses=1]
- store float %5, float* @f, align 4
- %6 = load double, double* @d, align 8 ; <double> [#uses=1]
- %7 = tail call inreg double @foo4(double inreg %6) nounwind ; <double> [#uses=1]
- store double %7, double* @d, align 8
- ret void
-}
-
-declare inreg float @foo1(float inreg)
-
-declare inreg double @foo2(double inreg)
-
-declare inreg float @foo3(float inreg)
-
-declare inreg double @foo4(double inreg)
More information about the llvm-commits
mailing list