[llvm] r354193 - [X86] Don't prevent load folding for cvtsi2ss/cvtsi2sd based on hasPartialRegUpdate.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 15 19:34:54 PST 2019
Author: ctopper
Date: Fri Feb 15 19:34:54 2019
New Revision: 354193
URL: http://llvm.org/viewvc/llvm-project?rev=354193&view=rev
Log:
[X86] Don't prevent load folding for cvtsi2ss/cvtsi2sd based on hasPartialRegUpdate.
Preventing the load fold won't fix the partial register update since the
input we can fold is a GPR. So it will do nothing to prevent a false dependency
on an XMM register.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion.ll
llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll
llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion.ll
llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll
llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=354193&r1=354192&r2=354193&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Fri Feb 15 19:34:54 2019
@@ -4239,7 +4239,8 @@ bool X86InstrInfo::expandPostRAPseudo(Ma
/// FIXME: This should be turned into a TSFlags.
///
static bool hasPartialRegUpdate(unsigned Opcode,
- const X86Subtarget &Subtarget) {
+ const X86Subtarget &Subtarget,
+ bool ForLoadFold = false) {
switch (Opcode) {
case X86::CVTSI2SSrr:
case X86::CVTSI2SSrm:
@@ -4249,6 +4250,9 @@ static bool hasPartialRegUpdate(unsigned
case X86::CVTSI2SDrm:
case X86::CVTSI642SDrr:
case X86::CVTSI642SDrm:
+ // Load folding won't effect the undef register update since the input is
+ // a GPR.
+ return !ForLoadFold;
case X86::CVTSD2SSrr:
case X86::CVTSD2SSrm:
case X86::CVTSS2SDrr:
@@ -4325,7 +4329,7 @@ unsigned X86InstrInfo::getPartialRegUpda
// Return true for any instruction the copies the high bits of the first source
// operand into the unused high bits of the destination operand.
-static bool hasUndefRegUpdate(unsigned Opcode) {
+static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) {
switch (Opcode) {
case X86::VCVTSI2SSrr:
case X86::VCVTSI2SSrm:
@@ -4343,38 +4347,6 @@ static bool hasUndefRegUpdate(unsigned O
case X86::VCVTSI642SDrm:
case X86::VCVTSI642SDrr_Int:
case X86::VCVTSI642SDrm_Int:
- case X86::VCVTSD2SSrr:
- case X86::VCVTSD2SSrm:
- case X86::VCVTSD2SSrr_Int:
- case X86::VCVTSD2SSrm_Int:
- case X86::VCVTSS2SDrr:
- case X86::VCVTSS2SDrm:
- case X86::VCVTSS2SDrr_Int:
- case X86::VCVTSS2SDrm_Int:
- case X86::VRCPSSr:
- case X86::VRCPSSr_Int:
- case X86::VRCPSSm:
- case X86::VRCPSSm_Int:
- case X86::VROUNDSDr:
- case X86::VROUNDSDm:
- case X86::VROUNDSDr_Int:
- case X86::VROUNDSDm_Int:
- case X86::VROUNDSSr:
- case X86::VROUNDSSm:
- case X86::VROUNDSSr_Int:
- case X86::VROUNDSSm_Int:
- case X86::VRSQRTSSr:
- case X86::VRSQRTSSr_Int:
- case X86::VRSQRTSSm:
- case X86::VRSQRTSSm_Int:
- case X86::VSQRTSSr:
- case X86::VSQRTSSr_Int:
- case X86::VSQRTSSm:
- case X86::VSQRTSSm_Int:
- case X86::VSQRTSDr:
- case X86::VSQRTSDr_Int:
- case X86::VSQRTSDm:
- case X86::VSQRTSDm_Int:
// AVX-512
case X86::VCVTSI2SSZrr:
case X86::VCVTSI2SSZrm:
@@ -4415,6 +4387,42 @@ static bool hasUndefRegUpdate(unsigned O
case X86::VCVTUSI642SDZrr_Int:
case X86::VCVTUSI642SDZrrb_Int:
case X86::VCVTUSI642SDZrm_Int:
+ // Load folding won't effect the undef register update since the input is
+ // a GPR.
+ return !ForLoadFold;
+ case X86::VCVTSD2SSrr:
+ case X86::VCVTSD2SSrm:
+ case X86::VCVTSD2SSrr_Int:
+ case X86::VCVTSD2SSrm_Int:
+ case X86::VCVTSS2SDrr:
+ case X86::VCVTSS2SDrm:
+ case X86::VCVTSS2SDrr_Int:
+ case X86::VCVTSS2SDrm_Int:
+ case X86::VRCPSSr:
+ case X86::VRCPSSr_Int:
+ case X86::VRCPSSm:
+ case X86::VRCPSSm_Int:
+ case X86::VROUNDSDr:
+ case X86::VROUNDSDm:
+ case X86::VROUNDSDr_Int:
+ case X86::VROUNDSDm_Int:
+ case X86::VROUNDSSr:
+ case X86::VROUNDSSm:
+ case X86::VROUNDSSr_Int:
+ case X86::VROUNDSSm_Int:
+ case X86::VRSQRTSSr:
+ case X86::VRSQRTSSr_Int:
+ case X86::VRSQRTSSm:
+ case X86::VRSQRTSSm_Int:
+ case X86::VSQRTSSr:
+ case X86::VSQRTSSr_Int:
+ case X86::VSQRTSSm:
+ case X86::VSQRTSSm_Int:
+ case X86::VSQRTSDr:
+ case X86::VSQRTSDr_Int:
+ case X86::VSQRTSDm:
+ case X86::VSQRTSDm_Int:
+ // AVX-512
case X86::VCVTSD2SSZrr:
case X86::VCVTSD2SSZrr_Int:
case X86::VCVTSD2SSZrrb_Int:
@@ -4735,8 +4743,9 @@ MachineInstr *X86InstrInfo::foldMemoryOp
return nullptr;
}
-static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr &MI) {
- if (MF.getFunction().optForSize() || !hasUndefRegUpdate(MI.getOpcode()) ||
+static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF,
+ MachineInstr &MI) {
+ if (!hasUndefRegUpdate(MI.getOpcode(), /*ForLoadFold*/true) ||
!MI.getOperand(1).isReg())
return false;
@@ -4772,7 +4781,7 @@ MachineInstr *X86InstrInfo::foldMemoryOp
// Avoid partial and undef register update stalls unless optimizing for size.
if (!MF.getFunction().optForSize() &&
- (hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
+ (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
shouldPreventUndefRegUpdateMemFold(MF, MI)))
return nullptr;
@@ -4940,7 +4949,7 @@ X86InstrInfo::foldMemoryOperandImpl(Mach
// Avoid partial and undef register update stalls unless optimizing for size.
if (!MF.getFunction().optForSize() &&
- (hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
+ (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
shouldPreventUndefRegUpdateMemFold(MF, MI)))
return nullptr;
@@ -5140,7 +5149,7 @@ MachineInstr *X86InstrInfo::foldMemoryOp
// Avoid partial and undef register update stalls unless optimizing for size.
if (!MF.getFunction().optForSize() &&
- (hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
+ (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
shouldPreventUndefRegUpdateMemFold(MF, MI)))
return nullptr;
Modified: llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll?rev=354193&r1=354192&r2=354193&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll Fri Feb 15 19:34:54 2019
@@ -22,14 +22,12 @@ entry:
define double @long_to_double_rm(i64* %a) {
; SSE2-LABEL: long_to_double_rm:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movq (%rdi), %rax
-; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
+; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: long_to_double_rm:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: movq (%rdi), %rax
-; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%0 = load i64, i64* %a
@@ -71,14 +69,12 @@ entry:
define float @long_to_float_rm(i64* %a) {
; SSE2-LABEL: long_to_float_rm:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movq (%rdi), %rax
-; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: long_to_float_rm:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: movq (%rdi), %rax
-; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%0 = load i64, i64* %a
Modified: llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion.ll?rev=354193&r1=354192&r2=354193&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion.ll Fri Feb 15 19:34:54 2019
@@ -27,8 +27,7 @@ define double @int_to_double_rr(i32 %a)
; SSE2_X86-NEXT: .cfi_def_cfa_register %ebp
; SSE2_X86-NEXT: andl $-8, %esp
; SSE2_X86-NEXT: subl $8, %esp
-; SSE2_X86-NEXT: movl 8(%ebp), %eax
-; SSE2_X86-NEXT: cvtsi2sdl %eax, %xmm0
+; SSE2_X86-NEXT: cvtsi2sdl 8(%ebp), %xmm0
; SSE2_X86-NEXT: movsd %xmm0, (%esp)
; SSE2_X86-NEXT: fldl (%esp)
; SSE2_X86-NEXT: movl %ebp, %esp
@@ -45,8 +44,7 @@ define double @int_to_double_rr(i32 %a)
; AVX_X86-NEXT: .cfi_def_cfa_register %ebp
; AVX_X86-NEXT: andl $-8, %esp
; AVX_X86-NEXT: subl $8, %esp
-; AVX_X86-NEXT: movl 8(%ebp), %eax
-; AVX_X86-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0
+; AVX_X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0
; AVX_X86-NEXT: vmovsd %xmm0, (%esp)
; AVX_X86-NEXT: fldl (%esp)
; AVX_X86-NEXT: movl %ebp, %esp
@@ -61,14 +59,12 @@ entry:
define double @int_to_double_rm(i32* %a) {
; SSE2-LABEL: int_to_double_rm:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movl (%rdi), %eax
-; SSE2-NEXT: cvtsi2sdl %eax, %xmm0
+; SSE2-NEXT: cvtsi2sdl (%rdi), %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: int_to_double_rm:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: movl (%rdi), %eax
-; AVX-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
;
; SSE2_X86-LABEL: int_to_double_rm:
@@ -179,8 +175,7 @@ define float @int_to_float_rr(i32 %a) {
; SSE2_X86: # %bb.0: # %entry
; SSE2_X86-NEXT: pushl %eax
; SSE2_X86-NEXT: .cfi_def_cfa_offset 8
-; SSE2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SSE2_X86-NEXT: cvtsi2ssl %eax, %xmm0
+; SSE2_X86-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
; SSE2_X86-NEXT: movss %xmm0, (%esp)
; SSE2_X86-NEXT: flds (%esp)
; SSE2_X86-NEXT: popl %eax
@@ -191,8 +186,7 @@ define float @int_to_float_rr(i32 %a) {
; AVX_X86: # %bb.0: # %entry
; AVX_X86-NEXT: pushl %eax
; AVX_X86-NEXT: .cfi_def_cfa_offset 8
-; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX_X86-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0
+; AVX_X86-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX_X86-NEXT: vmovss %xmm0, (%esp)
; AVX_X86-NEXT: flds (%esp)
; AVX_X86-NEXT: popl %eax
@@ -206,14 +200,12 @@ entry:
define float @int_to_float_rm(i32* %a) {
; SSE2-LABEL: int_to_float_rm:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movl (%rdi), %eax
-; SSE2-NEXT: cvtsi2ssl %eax, %xmm0
+; SSE2-NEXT: cvtsi2ssl (%rdi), %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: int_to_float_rm:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: movl (%rdi), %eax
-; AVX-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
;
; SSE2_X86-LABEL: int_to_float_rm:
Modified: llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll?rev=354193&r1=354192&r2=354193&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion-x86-64.ll Fri Feb 15 19:34:54 2019
@@ -15,8 +15,7 @@ entry:
define double @long_to_double_rm(i64* %a) {
; ALL-LABEL: long_to_double_rm:
; ALL: # %bb.0: # %entry
-; ALL-NEXT: movq (%rdi), %rax
-; ALL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0
+; ALL-NEXT: vcvtusi2sdq (%rdi), %xmm0, %xmm0
; ALL-NEXT: retq
entry:
%0 = load i64, i64* %a
@@ -48,8 +47,7 @@ entry:
define float @long_to_float_rm(i64* %a) {
; ALL-LABEL: long_to_float_rm:
; ALL: # %bb.0: # %entry
-; ALL-NEXT: movq (%rdi), %rax
-; ALL-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm0
+; ALL-NEXT: vcvtusi2ssq (%rdi), %xmm0, %xmm0
; ALL-NEXT: retq
entry:
%0 = load i64, i64* %a
Modified: llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion.ll?rev=354193&r1=354192&r2=354193&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-uint-float-conversion.ll Fri Feb 15 19:34:54 2019
@@ -18,8 +18,7 @@ define double @int_to_double_rr(i32 %a)
; AVX_X86-NEXT: .cfi_def_cfa_register %ebp
; AVX_X86-NEXT: andl $-8, %esp
; AVX_X86-NEXT: subl $8, %esp
-; AVX_X86-NEXT: movl 8(%ebp), %eax
-; AVX_X86-NEXT: vcvtusi2sdl %eax, %xmm0, %xmm0
+; AVX_X86-NEXT: vcvtusi2sdl 8(%ebp), %xmm0, %xmm0
; AVX_X86-NEXT: vmovsd %xmm0, (%esp)
; AVX_X86-NEXT: fldl (%esp)
; AVX_X86-NEXT: movl %ebp, %esp
@@ -34,8 +33,7 @@ entry:
define double @int_to_double_rm(i32* %a) {
; AVX-LABEL: int_to_double_rm:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: movl (%rdi), %eax
-; AVX-NEXT: vcvtusi2sdl %eax, %xmm0, %xmm0
+; AVX-NEXT: vcvtusi2sdl (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX_X86-LABEL: int_to_double_rm:
@@ -100,8 +98,7 @@ define float @int_to_float_rr(i32 %a) {
; AVX_X86: # %bb.0: # %entry
; AVX_X86-NEXT: pushl %eax
; AVX_X86-NEXT: .cfi_def_cfa_offset 8
-; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX_X86-NEXT: vcvtusi2ssl %eax, %xmm0, %xmm0
+; AVX_X86-NEXT: vcvtusi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX_X86-NEXT: vmovss %xmm0, (%esp)
; AVX_X86-NEXT: flds (%esp)
; AVX_X86-NEXT: popl %eax
@@ -115,8 +112,7 @@ entry:
define float @int_to_float_rm(i32* %a) {
; AVX-LABEL: int_to_float_rm:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: movl (%rdi), %eax
-; AVX-NEXT: vcvtusi2ssl %eax, %xmm0, %xmm0
+; AVX-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX_X86-LABEL: int_to_float_rm:
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll?rev=354193&r1=354192&r2=354193&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll Fri Feb 15 19:34:54 2019
@@ -577,8 +577,7 @@ define i64 @stack_fold_cvtsd2si64_int(<2
}
declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
-; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
-define double @stack_fold_cvtsi2sd(i32 %a0) optsize {
+define double @stack_fold_cvtsi2sd(i32 %a0) {
;CHECK-LABEL: stack_fold_cvtsi2sd
;CHECK: vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -586,8 +585,7 @@ define double @stack_fold_cvtsi2sd(i32 %
ret double %2
}
-; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
-define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) optsize {
+define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) {
;CHECK-LABEL: stack_fold_cvtsi2sd_int
;CHECK: vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -596,8 +594,7 @@ define <2 x double> @stack_fold_cvtsi2sd
ret <2 x double> %3
}
-; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
-define double @stack_fold_cvtsi642sd(i64 %a0) optsize {
+define double @stack_fold_cvtsi642sd(i64 %a0) {
;CHECK-LABEL: stack_fold_cvtsi642sd
;CHECK: vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -605,8 +602,7 @@ define double @stack_fold_cvtsi642sd(i64
ret double %2
}
-; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
-define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) optsize {
+define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) {
;CHECK-LABEL: stack_fold_cvtsi642sd_int
;CHECK: vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -615,8 +611,7 @@ define <2 x double> @stack_fold_cvtsi642
ret <2 x double> %3
}
-; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
-define float @stack_fold_cvtsi2ss(i32 %a0) optsize {
+define float @stack_fold_cvtsi2ss(i32 %a0) {
;CHECK-LABEL: stack_fold_cvtsi2ss
;CHECK: vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -624,8 +619,7 @@ define float @stack_fold_cvtsi2ss(i32 %a
ret float %2
}
-; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
-define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) optsize {
+define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) {
;CHECK-LABEL: stack_fold_cvtsi2ss_int
;CHECK: vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -634,8 +628,7 @@ define <4 x float> @stack_fold_cvtsi2ss_
ret <4 x float> %3
}
-; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
-define float @stack_fold_cvtsi642ss(i64 %a0) optsize {
+define float @stack_fold_cvtsi642ss(i64 %a0) {
;CHECK-LABEL: stack_fold_cvtsi642ss
;CHECK: vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -643,8 +636,7 @@ define float @stack_fold_cvtsi642ss(i64
ret float %2
}
-; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
-define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) optsize {
+define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) {
;CHECK-LABEL: stack_fold_cvtsi642ss_int
;CHECK: vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll?rev=354193&r1=354192&r2=354193&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-sse42.ll Fri Feb 15 19:34:54 2019
@@ -357,7 +357,7 @@ define <4 x float> @stack_fold_cvtsd2ss_
}
declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
-define double @stack_fold_cvtsi2sd(i32 %a0) minsize {
+define double @stack_fold_cvtsi2sd(i32 %a0) {
;CHECK-LABEL: stack_fold_cvtsi2sd
;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -374,7 +374,7 @@ define <2 x double> @stack_fold_cvtsi2sd
ret <2 x double> %3
}
-define double @stack_fold_cvtsi642sd(i64 %a0) optsize {
+define double @stack_fold_cvtsi642sd(i64 %a0) {
;CHECK-LABEL: stack_fold_cvtsi642sd
;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -391,7 +391,7 @@ define <2 x double> @stack_fold_cvtsi642
ret <2 x double> %3
}
-define float @stack_fold_cvtsi2ss(i32 %a0) minsize {
+define float @stack_fold_cvtsi2ss(i32 %a0) {
;CHECK-LABEL: stack_fold_cvtsi2ss
;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -408,7 +408,7 @@ define <4 x float> @stack_fold_cvtsi2ss_
ret <4 x float> %3
}
-define float @stack_fold_cvtsi642ss(i64 %a0) optsize {
+define float @stack_fold_cvtsi642ss(i64 %a0) {
;CHECK-LABEL: stack_fold_cvtsi642ss
;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
More information about the llvm-commits
mailing list