[llvm] r343689 - [X86] Correctly use SSE registers if no-x87 is selected.
Nirav Dave via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 3 07:13:30 PDT 2018
Author: niravd
Date: Wed Oct 3 07:13:30 2018
New Revision: 343689
URL: http://llvm.org/viewvc/llvm-project?rev=343689&view=rev
Log:
[X86] Correctly use SSE registers if no-x87 is selected.
Fix use of SSE1 registers for f32 ops in no-x87 mode.
Notably, allow use of SSE instructions for f32 operations in 64-bit
mode (but not 32-bit which is disallowed by callign convention).
Also avoid translating memset/memcopy/memmove into SSE registers
without X87 for 32-bit mode.
This fixes PR38738.
Reviewers: nickdesaulniers, craig.topper
Subscribers: hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D52555
Added:
llvm/trunk/test/CodeGen/X86/pr38738.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/x87.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=343689&r1=343688&r2=343689&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Oct 3 07:13:30 2018
@@ -543,15 +543,12 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
- // Expand FP immediates into loads from the stack, except for the special
- // cases we handle.
- addLegalFPImmediate(APFloat(+0.0)); // xorpd
- addLegalFPImmediate(APFloat(+0.0f)); // xorps
- } else if (UseX87 && X86ScalarSSEf32) {
+ } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
addRegisterClass(MVT::f32, &X86::FR32RegClass);
- addRegisterClass(MVT::f64, &X86::RFP64RegClass);
+ if (UseX87)
+ addRegisterClass(MVT::f64, &X86::RFP64RegClass);
// Use ANDPS to simulate FABS.
setOperationAction(ISD::FABS , MVT::f32, Custom);
@@ -559,10 +556,12 @@ X86TargetLowering::X86TargetLowering(con
// Use XORP to simulate FNEG.
setOperationAction(ISD::FNEG , MVT::f32, Custom);
- setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+ if (UseX87)
+ setOperationAction(ISD::UNDEF, MVT::f64, Expand);
// Use ANDPS and ORPS to simulate FCOPYSIGN.
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ if (UseX87)
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// We don't support sin/cos/fmod
@@ -570,17 +569,12 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
- // Special cases we handle for FP constants.
- addLegalFPImmediate(APFloat(+0.0f)); // xorps
- addLegalFPImmediate(APFloat(+0.0)); // FLD0
- addLegalFPImmediate(APFloat(+1.0)); // FLD1
- addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
- addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
-
- // Always expand sin/cos functions even though x87 has an instruction.
- setOperationAction(ISD::FSIN , MVT::f64, Expand);
- setOperationAction(ISD::FCOS , MVT::f64, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ if (UseX87) {
+ // Always expand sin/cos functions even though x87 has an instruction.
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ }
} else if (UseX87) {
// f32 and f64 in x87.
// Set up the FP register classes.
@@ -596,14 +590,27 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FCOS , VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
}
- addLegalFPImmediate(APFloat(+0.0)); // FLD0
- addLegalFPImmediate(APFloat(+1.0)); // FLD1
- addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
- addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
- addLegalFPImmediate(APFloat(+0.0f)); // FLD0
- addLegalFPImmediate(APFloat(+1.0f)); // FLD1
- addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
- addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
+ }
+
+ // Expand FP32 immediates into loads from the stack, save special cases.
+ if (isTypeLegal(MVT::f32)) {
+ if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
+ addLegalFPImmediate(APFloat(+0.0f)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0f)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
+ } else // SSE immediates.
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
+ }
+ // Expand FP64 immediates into loads from the stack, save special cases.
+ if (isTypeLegal(MVT::f64)) {
+ if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
+ addLegalFPImmediate(APFloat(+0.0)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+ } else // SSE immediates.
+ addLegalFPImmediate(APFloat(+0.0)); // xorpd
}
// We don't support FMA.
@@ -1936,7 +1943,8 @@ X86TargetLowering::getOptimalMemOpType(u
if (Subtarget.hasSSE2())
return MVT::v16i8;
// TODO: Can SSE1 handle a byte vector?
- if (Subtarget.hasSSE1())
+ // If we have SSE1 registers we should be able to use them.
+ if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()))
return MVT::v4f32;
} else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
!Subtarget.is64Bit() && Subtarget.hasSSE2()) {
Added: llvm/trunk/test/CodeGen/X86/pr38738.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr38738.ll?rev=343689&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr38738.ll (added)
+++ llvm/trunk/test/CodeGen/X86/pr38738.ll Wed Oct 3 07:13:30 2018
@@ -0,0 +1,254 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - -mattr=-x87,+sse,-sse2 %s | FileCheck --check-prefixes=X64SSE %s
+; RUN: llc -mtriple=i686-unknown-linux-gnu -o - -mattr=-x87,+sse,-sse2 %s | FileCheck --check-prefixes=X86SSE %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - -mattr=-x87,+sse2,-sse3 %s | FileCheck --check-prefixes=X64SSE2 %s
+; RUN: llc -mtriple=i686-unknown-linux-gnu -o - -mattr=-x87,+sse2,-sse3 %s | FileCheck --check-prefixes=X86SSE2 %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - -mattr=-x87,+avx,-avx2 %s | FileCheck --check-prefixes=X64AVX %s
+; RUN: llc -mtriple=i686-unknown-linux-gnu -o - -mattr=-x87,+avx,-avx2 %s | FileCheck --check-prefixes=X86AVX %s
+
+
+%struct.params = type { double, double }
+
+define i32 @pr38738() {
+; X64SSE-LABEL: pr38738:
+; X64SSE: # %bb.0: # %entry
+; X64SSE-NEXT: xorps %xmm0, %xmm0
+; X64SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64SSE-NEXT: movl $0, -{{[0-9]+}}(%rsp)
+; X64SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; X64SSE-NEXT: retq
+;
+; X86SSE-LABEL: pr38738:
+; X86SSE: # %bb.0: # %entry
+; X86SSE-NEXT: subl $28, %esp
+; X86SSE-NEXT: .cfi_def_cfa_offset 32
+; X86SSE-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86SSE-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86SSE-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86SSE-NEXT: movl $0, (%esp)
+; X86SSE-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86SSE-NEXT: addl $28, %esp
+; X86SSE-NEXT: .cfi_def_cfa_offset 4
+; X86SSE-NEXT: retl
+;
+; X64SSE2-LABEL: pr38738:
+; X64SSE2: # %bb.0: # %entry
+; X64SSE2-NEXT: xorps %xmm0, %xmm0
+; X64SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64SSE2-NEXT: movl $0, -{{[0-9]+}}(%rsp)
+; X64SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; X64SSE2-NEXT: retq
+;
+; X86SSE2-LABEL: pr38738:
+; X86SSE2: # %bb.0: # %entry
+; X86SSE2-NEXT: subl $44, %esp
+; X86SSE2-NEXT: .cfi_def_cfa_offset 48
+; X86SSE2-NEXT: xorps %xmm0, %xmm0
+; X86SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
+; X86SSE2-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86SSE2-NEXT: addl $44, %esp
+; X86SSE2-NEXT: .cfi_def_cfa_offset 4
+; X86SSE2-NEXT: retl
+;
+; X64AVX-LABEL: pr38738:
+; X64AVX: # %bb.0: # %entry
+; X64AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X64AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64AVX-NEXT: movl $0, -{{[0-9]+}}(%rsp)
+; X64AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; X64AVX-NEXT: retq
+;
+; X86AVX-LABEL: pr38738:
+; X86AVX: # %bb.0: # %entry
+; X86AVX-NEXT: subl $44, %esp
+; X86AVX-NEXT: .cfi_def_cfa_offset 48
+; X86AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X86AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
+; X86AVX-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86AVX-NEXT: addl $44, %esp
+; X86AVX-NEXT: .cfi_def_cfa_offset 4
+; X86AVX-NEXT: retl
+entry:
+ %retval = alloca i32, align 4
+ %dlg_sys_param = alloca %struct.params, align 8
+ %total_active_bw = alloca float, align 4
+ %0 = bitcast %struct.params* %dlg_sys_param to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 16, i1 false)
+ store float 0.000000e+00, float* %total_active_bw, align 4
+ %1 = load i32, i32* %retval, align 4
+ ret i32 %1
+}
+
+define void @tryset(i8* nocapture %x) {
+; X64SSE-LABEL: tryset:
+; X64SSE: # %bb.0:
+; X64SSE-NEXT: movq $0, 56(%rdi)
+; X64SSE-NEXT: movq $0, 48(%rdi)
+; X64SSE-NEXT: movq $0, 40(%rdi)
+; X64SSE-NEXT: movq $0, 32(%rdi)
+; X64SSE-NEXT: movq $0, 24(%rdi)
+; X64SSE-NEXT: movq $0, 16(%rdi)
+; X64SSE-NEXT: movq $0, 8(%rdi)
+; X64SSE-NEXT: movq $0, (%rdi)
+; X64SSE-NEXT: retq
+;
+; X86SSE-LABEL: tryset:
+; X86SSE: # %bb.0:
+; X86SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86SSE-NEXT: movl $0, 60(%eax)
+; X86SSE-NEXT: movl $0, 56(%eax)
+; X86SSE-NEXT: movl $0, 52(%eax)
+; X86SSE-NEXT: movl $0, 48(%eax)
+; X86SSE-NEXT: movl $0, 44(%eax)
+; X86SSE-NEXT: movl $0, 40(%eax)
+; X86SSE-NEXT: movl $0, 36(%eax)
+; X86SSE-NEXT: movl $0, 32(%eax)
+; X86SSE-NEXT: movl $0, 28(%eax)
+; X86SSE-NEXT: movl $0, 24(%eax)
+; X86SSE-NEXT: movl $0, 20(%eax)
+; X86SSE-NEXT: movl $0, 16(%eax)
+; X86SSE-NEXT: movl $0, 12(%eax)
+; X86SSE-NEXT: movl $0, 8(%eax)
+; X86SSE-NEXT: movl $0, 4(%eax)
+; X86SSE-NEXT: movl $0, (%eax)
+; X86SSE-NEXT: retl
+;
+; X64SSE2-LABEL: tryset:
+; X64SSE2: # %bb.0:
+; X64SSE2-NEXT: movq $0, 56(%rdi)
+; X64SSE2-NEXT: movq $0, 48(%rdi)
+; X64SSE2-NEXT: movq $0, 40(%rdi)
+; X64SSE2-NEXT: movq $0, 32(%rdi)
+; X64SSE2-NEXT: movq $0, 24(%rdi)
+; X64SSE2-NEXT: movq $0, 16(%rdi)
+; X64SSE2-NEXT: movq $0, 8(%rdi)
+; X64SSE2-NEXT: movq $0, (%rdi)
+; X64SSE2-NEXT: retq
+;
+; X86SSE2-LABEL: tryset:
+; X86SSE2: # %bb.0:
+; X86SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86SSE2-NEXT: movl $0, 4(%eax)
+; X86SSE2-NEXT: movl $0, (%eax)
+; X86SSE2-NEXT: movl $0, 12(%eax)
+; X86SSE2-NEXT: movl $0, 8(%eax)
+; X86SSE2-NEXT: movl $0, 20(%eax)
+; X86SSE2-NEXT: movl $0, 16(%eax)
+; X86SSE2-NEXT: movl $0, 28(%eax)
+; X86SSE2-NEXT: movl $0, 24(%eax)
+; X86SSE2-NEXT: movl $0, 36(%eax)
+; X86SSE2-NEXT: movl $0, 32(%eax)
+; X86SSE2-NEXT: movl $0, 44(%eax)
+; X86SSE2-NEXT: movl $0, 40(%eax)
+; X86SSE2-NEXT: movl $0, 52(%eax)
+; X86SSE2-NEXT: movl $0, 48(%eax)
+; X86SSE2-NEXT: movl $0, 60(%eax)
+; X86SSE2-NEXT: movl $0, 56(%eax)
+; X86SSE2-NEXT: retl
+;
+; X64AVX-LABEL: tryset:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X64AVX-NEXT: vmovups %ymm0, 32(%rdi)
+; X64AVX-NEXT: vmovups %ymm0, (%rdi)
+; X64AVX-NEXT: vzeroupper
+; X64AVX-NEXT: retq
+;
+; X86AVX-LABEL: tryset:
+; X86AVX: # %bb.0:
+; X86AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X86AVX-NEXT: vmovups %ymm0, 32(%eax)
+; X86AVX-NEXT: vmovups %ymm0, (%eax)
+; X86AVX-NEXT: vzeroupper
+; X86AVX-NEXT: retl
+ tail call void @llvm.memset.p0i8.i64(i8* align 1 %x, i8 0, i64 64, i1 false)
+ ret void
+}
+
+define void @trycpy(i8* nocapture %x, i8* nocapture readonly %y) {
+; X64SSE-LABEL: trycpy:
+; X64SSE: # %bb.0:
+; X64SSE-NEXT: movq 24(%rsi), %rax
+; X64SSE-NEXT: movq %rax, 24(%rdi)
+; X64SSE-NEXT: movq 16(%rsi), %rax
+; X64SSE-NEXT: movq %rax, 16(%rdi)
+; X64SSE-NEXT: movq (%rsi), %rax
+; X64SSE-NEXT: movq 8(%rsi), %rcx
+; X64SSE-NEXT: movq %rcx, 8(%rdi)
+; X64SSE-NEXT: movq %rax, (%rdi)
+; X64SSE-NEXT: retq
+;
+; X86SSE-LABEL: trycpy:
+; X86SSE: # %bb.0:
+; X86SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86SSE-NEXT: movl 28(%ecx), %edx
+; X86SSE-NEXT: movl %edx, 28(%eax)
+; X86SSE-NEXT: movl 24(%ecx), %edx
+; X86SSE-NEXT: movl %edx, 24(%eax)
+; X86SSE-NEXT: movl 20(%ecx), %edx
+; X86SSE-NEXT: movl %edx, 20(%eax)
+; X86SSE-NEXT: movl 16(%ecx), %edx
+; X86SSE-NEXT: movl %edx, 16(%eax)
+; X86SSE-NEXT: movl 12(%ecx), %edx
+; X86SSE-NEXT: movl %edx, 12(%eax)
+; X86SSE-NEXT: movl 8(%ecx), %edx
+; X86SSE-NEXT: movl %edx, 8(%eax)
+; X86SSE-NEXT: movl (%ecx), %edx
+; X86SSE-NEXT: movl 4(%ecx), %ecx
+; X86SSE-NEXT: movl %ecx, 4(%eax)
+; X86SSE-NEXT: movl %edx, (%eax)
+; X86SSE-NEXT: retl
+;
+; X64SSE2-LABEL: trycpy:
+; X64SSE2: # %bb.0:
+; X64SSE2-NEXT: movq 24(%rsi), %rax
+; X64SSE2-NEXT: movq %rax, 24(%rdi)
+; X64SSE2-NEXT: movq 16(%rsi), %rax
+; X64SSE2-NEXT: movq %rax, 16(%rdi)
+; X64SSE2-NEXT: movq (%rsi), %rax
+; X64SSE2-NEXT: movq 8(%rsi), %rcx
+; X64SSE2-NEXT: movq %rcx, 8(%rdi)
+; X64SSE2-NEXT: movq %rax, (%rdi)
+; X64SSE2-NEXT: retq
+;
+; X86SSE2-LABEL: trycpy:
+; X86SSE2: # %bb.0:
+; X86SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86SSE2-NEXT: movsd %xmm0, 24(%eax)
+; X86SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86SSE2-NEXT: movsd %xmm0, 16(%eax)
+; X86SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; X86SSE2-NEXT: movsd %xmm1, 8(%eax)
+; X86SSE2-NEXT: movsd %xmm0, (%eax)
+; X86SSE2-NEXT: retl
+;
+; X64AVX-LABEL: trycpy:
+; X64AVX: # %bb.0:
+; X64AVX-NEXT: vmovups (%rsi), %ymm0
+; X64AVX-NEXT: vmovups %ymm0, (%rdi)
+; X64AVX-NEXT: vzeroupper
+; X64AVX-NEXT: retq
+;
+; X86AVX-LABEL: trycpy:
+; X86AVX: # %bb.0:
+; X86AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86AVX-NEXT: vmovups (%ecx), %ymm0
+; X86AVX-NEXT: vmovups %ymm0, (%eax)
+; X86AVX-NEXT: vzeroupper
+; X86AVX-NEXT: retl
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %x, i8* align 1 %y, i64 32, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #2
+
Modified: llvm/trunk/test/CodeGen/X86/x87.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x87.ll?rev=343689&r1=343688&r2=343689&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x87.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x87.ll Wed Oct 3 07:13:30 2018
@@ -1,9 +1,9 @@
-; RUN: llc < %s -mtriple=i686-- | FileCheck %s -check-prefix=X87
-; RUN: llc < %s -mtriple=x86_64-- -mattr=-sse | FileCheck %s -check-prefix=X87
-; RUN: llc < %s -mtriple=i686-- -mattr=-x87 | FileCheck %s -check-prefix=NOX87
-; RUN: llc < %s -mtriple=x86_64-- -mattr=-x87,-sse | FileCheck %s -check-prefix=NOX87
-; RUN: llc < %s -mtriple=i686-- -mattr=-x87,+sse | FileCheck %s -check-prefix=NOX87
-; RUN: llc < %s -mtriple=x86_64-- -mattr=-x87,-sse2 | FileCheck %s -check-prefix=NOX87
+; RUN: llc < %s -mtriple=i686-- | FileCheck %s -check-prefixes=X8732,X87
+; RUN: llc < %s -mtriple=x86_64-- -mattr=-sse | FileCheck %s -check-prefixes=X8732,X87
+; RUN: llc < %s -mtriple=i686-- -mattr=-x87 | FileCheck %s -check-prefixes=NOX8732,NOX87
+; RUN: llc < %s -mtriple=x86_64-- -mattr=-x87,-sse | FileCheck %s -check-prefixes=NOX8732,NOX87
+; RUN: llc < %s -mtriple=i686-- -mattr=-x87,+sse | FileCheck %s -check-prefixes=NOX8732,NOX87
+; RUN: llc < %s -mtriple=x86_64-- -mattr=-x87,-sse2 | FileCheck %s -check-prefixes=X8732_SSE,NOX87
define void @test(i32 %i, i64 %l, float* %pf, double* %pd, fp128* %pld) nounwind readnone {
; X87-LABEL: test:
@@ -12,18 +12,18 @@ define void @test(i32 %i, i64 %l, float*
; NOX87-NOT: {{ }}f{{.*}}
; X87: fild
-; NOX87: __floatunsisf
+; NOX8732: __floatunsisf
%tmp = uitofp i32 %i to float
-; X87: fild
-; NOX87: __floatdisf
+; X8732: fild
+; NOX8732: __floatdisf
%tmp1 = sitofp i64 %l to float
-; X87: fadd
-; NOX87: __addsf3
+; X8732: fadd
+; NOX8732: __addsf3
%tmp2 = fadd float %tmp, %tmp1
-; X87: fstp
+; X8732: fstp
store float %tmp2, float* %pf
; X87: fild
More information about the llvm-commits
mailing list