[llvm] r321950 - [X86] Remove cvtps2ph xmm->xmm from store folding tables. Add the evex versions of cvtps2ph to the store folding tables.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 6 22:24:23 PST 2018
Author: ctopper
Date: Sat Jan 6 22:24:23 2018
New Revision: 321950
URL: http://llvm.org/viewvc/llvm-project?rev=321950&view=rev
Log:
[X86] Remove cvtps2ph xmm->xmm from store folding tables. Add the evex versions of cvtps2ph to the store folding tables.
The memory form of the xmm->xmm version only writes 64-bits. If we use it in the folding tables and its get used for a stack spill, only half the slot will be written. Then a reload may read all 128-bits which will pull in garbage. But without the spill the upper bits of the register would have been zero. By not folding we would preserve the zeros.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll
llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=321950&r1=321949&r2=321950&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sat Jan 6 22:24:23 2018
@@ -540,8 +540,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128mr, TB_FOLDED_STORE },
// F16C foldable instructions
- { X86::VCVTPS2PHrr, X86::VCVTPS2PHmr, TB_FOLDED_STORE },
- { X86::VCVTPS2PHYrr, X86::VCVTPS2PHYmr, TB_FOLDED_STORE }
+ { X86::VCVTPS2PHYrr, X86::VCVTPS2PHYmr, TB_FOLDED_STORE },
+ { X86::VCVTPS2PHZ256rr, X86::VCVTPS2PHZ256mr, TB_FOLDED_STORE },
+ { X86::VCVTPS2PHZrr, X86::VCVTPS2PHZmr, TB_FOLDED_STORE },
};
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable0) {
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll?rev=321950&r1=321949&r2=321950&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx1.ll Sat Jan 6 22:24:23 2018
@@ -535,15 +535,6 @@ define <4 x double> @stack_fold_cvtps2pd
}
declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
-define <8 x i16> @stack_fold_cvtps2ph(<4 x float> %a0) {
- ;CHECK-LABEL: stack_fold_cvtps2ph
- ;CHECK: vcvtps2ph $0, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
- %1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0)
- %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- ret <8 x i16> %1
-}
-declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32) nounwind readonly
-
define <8 x i16> @stack_fold_cvtps2ph_ymm(<8 x float> %a0) {
;CHECK-LABEL: stack_fold_cvtps2ph_ymm
;CHECK: vcvtps2ph $0, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll?rev=321950&r1=321949&r2=321950&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll Sat Jan 6 22:24:23 2018
@@ -208,6 +208,15 @@ define <8 x float> @stack_fold_cvtpd2ps(
ret <8 x float> %2
}
+define <16 x i16> @stack_fold_cvtps2ph(<16 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtps2ph
+ ;CHECK: vcvtps2ph $0, {{%zmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 32-byte Folded Spill
+ %1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 0, <16 x i16> undef, i16 -1)
+ %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ ret <16 x i16> %1
+}
+declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
+
define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
;CHECK-LABEL: stack_fold_insertps
;CHECK: vinsertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
More information about the llvm-commits
mailing list