[llvm] r334802 - [X86] Prevent folding stack reloads with instructions that have an undefined register update.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 14 23:11:37 PDT 2018
Author: ctopper
Date: Thu Jun 14 23:11:36 2018
New Revision: 334802
URL: http://llvm.org/viewvc/llvm-project?rev=334802&view=rev
Log:
[X86] Prevent folding stack reloads with instructions that have an undefined register update.
We want to keep the load unfolded so we can use the same register for both sources to avoid a false dependency.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/break-false-dep.ll
llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=334802&r1=334801&r2=334802&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Thu Jun 14 23:11:36 2018
@@ -8916,12 +8916,15 @@ static bool shouldPreventUndefRegUpdateM
if (MF.getFunction().optForSize() || !hasUndefRegUpdate(MI.getOpcode()) ||
!MI.getOperand(1).isReg())
return false;
-
+
+ // Check if the register is explicitly marked as undef.
+ if (MI.getOperand(1).isUndef())
+ return true;
+
+ // Another possibility is that it is defined by by an IMPLICIT_DEF pseudo.
MachineRegisterInfo &RegInfo = MF.getRegInfo();
MachineInstr *VRegDef = RegInfo.getUniqueVRegDef(MI.getOperand(1).getReg());
- if (VRegDef == nullptr)
- return false;
- return VRegDef->isImplicitDef();
+ return VRegDef && VRegDef->isImplicitDef()
}
Modified: llvm/trunk/test/CodeGen/X86/break-false-dep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/break-false-dep.ll?rev=334802&r1=334801&r2=334802&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/break-false-dep.ll (original)
+++ llvm/trunk/test/CodeGen/X86/break-false-dep.ll Thu Jun 14 23:11:36 2018
@@ -217,7 +217,6 @@ top:
; Make sure we are making a smart choice regarding undef registers and
; hiding the false dependency behind a true dependency
-; TODO: We shouldn't be folding the load here.
define double @truedeps(float %arg) {
top:
tail call void asm sideeffect "", "~{xmm6},~{dirflag},~{fpsr},~{flags}"()
@@ -228,8 +227,8 @@ top:
%tmp1 = fpext float %arg to double
ret double %tmp1
;AVX-LABEL:@truedeps
-;AVX: vxorps [[XMM6:%xmm6]], [[XMM6]], [[XMM6]]
-;AVX: vcvtss2sd {{.*}}, [[XMM6]], {{%xmm[0-9]+}}
+;AVX-NOT: vxorps
+;AVX: vcvtss2sd [[XMM0:%xmm[0-9]+]], [[XMM0]], {{%xmm[0-9]+}}
}
; Make sure we are making a smart choice regarding undef registers and
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll?rev=334802&r1=334801&r2=334802&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll Thu Jun 14 23:11:36 2018
@@ -573,7 +573,7 @@ define i64 @stack_fold_cvtsd2si64_int(<2
}
declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
-define double @stack_fold_cvtsi2sd(i32 %a0) {
+define double @stack_fold_cvtsi2sd(i32 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi2sd
;CHECK: vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -581,7 +581,7 @@ define double @stack_fold_cvtsi2sd(i32 %
ret double %2
}
-define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) {
+define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi2sd_int
;CHECK: vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -590,7 +590,7 @@ define <2 x double> @stack_fold_cvtsi2sd
}
declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
-define double @stack_fold_cvtsi642sd(i64 %a0) {
+define double @stack_fold_cvtsi642sd(i64 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi642sd
;CHECK: vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -598,7 +598,7 @@ define double @stack_fold_cvtsi642sd(i64
ret double %2
}
-define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) {
+define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi642sd_int
;CHECK: vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -607,7 +607,7 @@ define <2 x double> @stack_fold_cvtsi642
}
declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
-define float @stack_fold_cvtsi2ss(i32 %a0) {
+define float @stack_fold_cvtsi2ss(i32 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi2ss
;CHECK: vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -615,7 +615,7 @@ define float @stack_fold_cvtsi2ss(i32 %a
ret float %2
}
-define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) {
+define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi2ss_int
;CHECK: vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -624,7 +624,7 @@ define <4 x float> @stack_fold_cvtsi2ss_
}
declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
-define float @stack_fold_cvtsi642ss(i64 %a0) {
+define float @stack_fold_cvtsi642ss(i64 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi642ss
;CHECK: vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -632,7 +632,7 @@ define float @stack_fold_cvtsi642ss(i64
ret float %2
}
-define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) {
+define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi642ss_int
;CHECK: vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -1653,7 +1653,7 @@ define <8 x float> @stack_fold_sqrtps_ym
}
declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
-define double @stack_fold_sqrtsd(double %a0) {
+define double @stack_fold_sqrtsd(double %a0) optsize {
;CHECK-LABEL: stack_fold_sqrtsd
;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
@@ -1664,7 +1664,7 @@ declare double @llvm.sqrt.f64(double) no
; TODO stack_fold_sqrtsd_int
-define float @stack_fold_sqrtss(float %a0) {
+define float @stack_fold_sqrtss(float %a0) optsize {
;CHECK-LABEL: stack_fold_sqrtss
;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
More information about the llvm-commits
mailing list