[llvm] r334848 - [X86] Prevent folding stack reloads into instructions in hasUndefRegUpdate.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 15 10:56:17 PDT 2018
Author: ctopper
Date: Fri Jun 15 10:56:17 2018
New Revision: 334848
URL: http://llvm.org/viewvc/llvm-project?rev=334848&view=rev
Log:
[X86] Prevent folding stack reloads into instructions in hasUndefRegUpdate.
An earlier commit prevented folds from the peephole pass by checking for IMPLICIT_DEF. But later in the pipeline IMPLICIT_DEF just becomes and Undef flag on the input register so we need to check for that case too.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/break-false-dep.ll
llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=334848&r1=334847&r2=334848&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Fri Jun 15 10:56:17 2018
@@ -8916,12 +8916,18 @@ static bool shouldPreventUndefRegUpdateM
if (MF.getFunction().optForSize() || !hasUndefRegUpdate(MI.getOpcode()) ||
!MI.getOperand(1).isReg())
return false;
-
+
+ // The are two cases we need to handle depending on where in the pipeline
+ // the folding attempt is being made.
+ // -Register has the undef flag set.
+ // -Register is produced by the IMPLICIT_DEF instruction.
+
+ if (MI.getOperand(1).isUndef())
+ return true;
+
MachineRegisterInfo &RegInfo = MF.getRegInfo();
MachineInstr *VRegDef = RegInfo.getUniqueVRegDef(MI.getOperand(1).getReg());
- if (VRegDef == nullptr)
- return false;
- return VRegDef->isImplicitDef();
+ return VRegDef && VRegDef->isImplicitDef();
}
Modified: llvm/trunk/test/CodeGen/X86/break-false-dep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/break-false-dep.ll?rev=334848&r1=334847&r2=334848&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/break-false-dep.ll (original)
+++ llvm/trunk/test/CodeGen/X86/break-false-dep.ll Fri Jun 15 10:56:17 2018
@@ -217,7 +217,6 @@ top:
; Make sure we are making a smart choice regarding undef registers and
; hiding the false dependency behind a true dependency
-; TODO: We shouldn't be folding the load here.
define double @truedeps(float %arg) {
top:
tail call void asm sideeffect "", "~{xmm6},~{dirflag},~{fpsr},~{flags}"()
@@ -228,8 +227,8 @@ top:
%tmp1 = fpext float %arg to double
ret double %tmp1
;AVX-LABEL:@truedeps
-;AVX: vxorps [[XMM6:%xmm6]], [[XMM6]], [[XMM6]]
-;AVX: vcvtss2sd {{.*}}, [[XMM6]], {{%xmm[0-9]+}}
+;AVX-NOT: vxorps
+;AVX: vcvtss2sd [[XMM0:%xmm[0-9]+]], [[XMM0]], {{%xmm[0-9]+}}
}
; Make sure we are making a smart choice regarding undef registers and
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll?rev=334848&r1=334847&r2=334848&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll Fri Jun 15 10:56:17 2018
@@ -573,7 +573,8 @@ define i64 @stack_fold_cvtsd2si64_int(<2
}
declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
-define double @stack_fold_cvtsi2sd(i32 %a0) {
+; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
+define double @stack_fold_cvtsi2sd(i32 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi2sd
;CHECK: vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -581,7 +582,8 @@ define double @stack_fold_cvtsi2sd(i32 %
ret double %2
}
-define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) {
+; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
+define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi2sd_int
;CHECK: vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -590,7 +592,8 @@ define <2 x double> @stack_fold_cvtsi2sd
}
declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
-define double @stack_fold_cvtsi642sd(i64 %a0) {
+; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
+define double @stack_fold_cvtsi642sd(i64 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi642sd
;CHECK: vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -598,7 +601,8 @@ define double @stack_fold_cvtsi642sd(i64
ret double %2
}
-define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) {
+; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
+define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi642sd_int
;CHECK: vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -607,7 +611,8 @@ define <2 x double> @stack_fold_cvtsi642
}
declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
-define float @stack_fold_cvtsi2ss(i32 %a0) {
+; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
+define float @stack_fold_cvtsi2ss(i32 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi2ss
;CHECK: vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -615,7 +620,8 @@ define float @stack_fold_cvtsi2ss(i32 %a
ret float %2
}
-define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) {
+; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
+define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi2ss_int
;CHECK: vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -624,7 +630,8 @@ define <4 x float> @stack_fold_cvtsi2ss_
}
declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
-define float @stack_fold_cvtsi642ss(i64 %a0) {
+; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
+define float @stack_fold_cvtsi642ss(i64 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi642ss
;CHECK: vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -632,7 +639,8 @@ define float @stack_fold_cvtsi642ss(i64
ret float %2
}
-define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) {
+; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR.
+define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) optsize {
;CHECK-LABEL: stack_fold_cvtsi642ss_int
;CHECK: vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -1653,7 +1661,7 @@ define <8 x float> @stack_fold_sqrtps_ym
}
declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
-define double @stack_fold_sqrtsd(double %a0) {
+define double @stack_fold_sqrtsd(double %a0) optsize {
;CHECK-LABEL: stack_fold_sqrtsd
;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
@@ -1664,7 +1672,7 @@ declare double @llvm.sqrt.f64(double) no
; TODO stack_fold_sqrtsd_int
-define float @stack_fold_sqrtss(float %a0) {
+define float @stack_fold_sqrtss(float %a0) optsize {
;CHECK-LABEL: stack_fold_sqrtss
;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
More information about the llvm-commits
mailing list