[llvm] r277960 - [AVX-512] Add EVEX encoded floating point MAX/MIN instructions to the load folding tables.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 7 10:14:06 PDT 2016
Author: ctopper
Date: Sun Aug 7 12:14:05 2016
New Revision: 277960
URL: http://llvm.org/viewvc/llvm-project?rev=277960&view=rev
Log:
[AVX-512] Add EVEX encoded floating point MAX/MIN instructions to the load folding tables.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=277960&r1=277959&r2=277960&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Aug 7 12:14:05 2016
@@ -1720,10 +1720,26 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VDIVSSZrr_Int, X86::VDIVSSZrm_Int, 0 },
{ X86::VDIVSDZrr, X86::VDIVSDZrm, 0 },
{ X86::VDIVSDZrr_Int, X86::VDIVSDZrm_Int, 0 },
- { X86::VMINPSZrr, X86::VMINPSZrm, 0 },
- { X86::VMINPDZrr, X86::VMINPDZrm, 0 },
- { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 },
{ X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
+ { X86::VMAXCPDZrr, X86::VMAXCPDZrm, 0 },
+ { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 },
+ { X86::VMAXCPSZrr, X86::VMAXCPSZrm, 0 },
+ { X86::VMAXSDZrr, X86::VMAXSDZrm, 0 },
+ { X86::VMAXCSDZrr, X86::VMAXCSDZrm, 0 },
+ { X86::VMAXSDZrr_Int, X86::VMAXSDZrm_Int, 0 },
+ { X86::VMAXSSZrr, X86::VMAXSSZrm, 0 },
+ { X86::VMAXCSSZrr, X86::VMAXCSSZrm, 0 },
+ { X86::VMAXSSZrr_Int, X86::VMAXSSZrm_Int, 0 },
+ { X86::VMINPDZrr, X86::VMINPDZrm, 0 },
+ { X86::VMINCPDZrr, X86::VMINCPDZrm, 0 },
+ { X86::VMINPSZrr, X86::VMINPSZrm, 0 },
+ { X86::VMINCPSZrr, X86::VMINCPSZrm, 0 },
+ { X86::VMINSDZrr, X86::VMINSDZrm, 0 },
+ { X86::VMINCSDZrr, X86::VMINCSDZrm, 0 },
+ { X86::VMINSDZrr_Int, X86::VMINSDZrm_Int, 0 },
+ { X86::VMINSSZrr, X86::VMINSSZrm, 0 },
+ { X86::VMINCSSZrr, X86::VMINCSSZrm, 0 },
+ { X86::VMINSSZrr_Int, X86::VMINSSZrm_Int, 0 },
{ X86::VPADDDZrr, X86::VPADDDZrm, 0 },
{ X86::VPADDQZrr, X86::VPADDQZrm, 0 },
{ X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
@@ -1798,6 +1814,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VXORPDZ256rr, X86::VXORPDZ256rm, 0 },
{ X86::VXORPSZ128rr, X86::VXORPSZ128rm, 0 },
{ X86::VXORPSZ256rr, X86::VXORPSZ256rm, 0 },
+ { X86::VMAXPDZ128rr, X86::VMAXPDZ128rm, 0 },
+ { X86::VMAXPDZ256rr, X86::VMAXPDZ256rm, 0 },
+ { X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rm, 0 },
+ { X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rm, 0 },
+ { X86::VMAXPSZ128rr, X86::VMAXPSZ128rm, 0 },
+ { X86::VMAXPSZ256rr, X86::VMAXPSZ256rm, 0 },
+ { X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rm, 0 },
+ { X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rm, 0 },
+ { X86::VMINPDZ128rr, X86::VMINPDZ128rm, 0 },
+ { X86::VMINPDZ256rr, X86::VMINPDZ256rm, 0 },
+ { X86::VMINCPDZ128rr, X86::VMINCPDZ128rm, 0 },
+ { X86::VMINCPDZ256rr, X86::VMINCPDZ256rm, 0 },
+ { X86::VMINPSZ128rr, X86::VMINPSZ128rm, 0 },
+ { X86::VMINPSZ256rr, X86::VMINPSZ256rm, 0 },
+ { X86::VMINCPSZ128rr, X86::VMINCPSZ128rm, 0 },
+ { X86::VMINCPSZ256rr, X86::VMINCPSZ256rm, 0 },
// AES foldable instructions
{ X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll?rev=277960&r1=277959&r2=277960&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll Sun Aug 7 12:14:05 2016
@@ -226,6 +226,74 @@ define <4 x float> @stack_fold_insertps(
}
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
+define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 {
+ ;CHECK-LABEL: stack_fold_maxpd
+ ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 {
+ ;CHECK-LABEL: stack_fold_maxpd_commutable
+ ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+
+define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) #0 {
+ ;CHECK-LABEL: stack_fold_maxpd_ymm
+ ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1)
+ ret <4 x double> %2
+}
+declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+define <4 x double> @stack_fold_maxpd_ymm_commutable(<4 x double> %a0, <4 x double> %a1) #1 {
+ ;CHECK-LABEL: stack_fold_maxpd_ymm_commutable
+ ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1)
+ ret <4 x double> %2
+}
+
+define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 {
+ ;CHECK-LABEL: stack_fold_maxps
+ ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
+ ;CHECK-LABEL: stack_fold_maxps_commutable
+ ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+
+define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) #0 {
+ ;CHECK-LABEL: stack_fold_maxps_ymm
+ ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+define <8 x float> @stack_fold_maxps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) #1 {
+ ;CHECK-LABEL: stack_fold_maxps_ymm_commutable
+ ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
+ ret <8 x float> %2
+}
+
define double @stack_fold_mulsd(double %a0, double %a1) {
;CHECK-LABEL: stack_fold_mulsd
;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
@@ -437,3 +505,6 @@ define <8 x float> @stack_fold_xorps_ymm
%6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
ret <8 x float> %6
}
+
+attributes #0 = { "unsafe-fp-math"="false" }
+attributes #1 = { "unsafe-fp-math"="true" }
More information about the llvm-commits
mailing list