[llvm] r288481 - [AVX-512] Add masked VINSERTF/VINSERTI instructions to load folding tables.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 1 22:24:39 PST 2016
Author: ctopper
Date: Fri Dec 2 00:24:38 2016
New Revision: 288481
URL: http://llvm.org/viewvc/llvm-project?rev=288481&view=rev
Log:
[AVX-512] Add masked VINSERTF/VINSERTI instructions to load folding tables.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=288481&r1=288480&r2=288481&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Fri Dec 2 00:24:38 2016
@@ -2254,7 +2254,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPTERNLOGDZ128rri, X86::VPTERNLOGDZ128rmi, 0 },
{ X86::VPTERNLOGQZ128rri, X86::VPTERNLOGQZ128rmi, 0 },
- // AVX-512 masked arithmetic instructions
+ // AVX-512 masked instructions
{ X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 },
{ X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 },
{ X86::VANDNPDZrrkz, X86::VANDNPDZrmkz, 0 },
@@ -2263,6 +2263,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VANDPSZrrkz, X86::VANDPSZrmkz, 0 },
{ X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 },
{ X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 },
+ { X86::VINSERTF32x4Zrrkz, X86::VINSERTF32x4Zrmkz, 0 },
+ { X86::VINSERTF32x8Zrrkz, X86::VINSERTF32x8Zrmkz, 0 },
+ { X86::VINSERTF64x2Zrrkz, X86::VINSERTF64x2Zrmkz, 0 },
+ { X86::VINSERTF64x4Zrrkz, X86::VINSERTF64x4Zrmkz, 0 },
+ { X86::VINSERTI32x4Zrrkz, X86::VINSERTI32x4Zrmkz, 0 },
+ { X86::VINSERTI32x8Zrrkz, X86::VINSERTI32x8Zrmkz, 0 },
+ { X86::VINSERTI64x2Zrrkz, X86::VINSERTI64x2Zrmkz, 0 },
+ { X86::VINSERTI64x4Zrrkz, X86::VINSERTI64x4Zrmkz, 0 },
{ X86::VMAXCPDZrrkz, X86::VMAXCPDZrmkz, 0 },
{ X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0 },
{ X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 },
@@ -2325,6 +2333,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VANDPSZ256rrkz, X86::VANDPSZ256rmkz, 0 },
{ X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 },
{ X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 },
+ { X86::VINSERTF32x4Z256rrkz, X86::VINSERTF32x4Z256rmkz, 0 },
+ { X86::VINSERTF64x2Z256rrkz, X86::VINSERTF64x2Z256rmkz, 0 },
+ { X86::VINSERTI32x4Z256rrkz, X86::VINSERTI32x4Z256rmkz, 0 },
+ { X86::VINSERTI64x2Z256rrkz, X86::VINSERTI64x2Z256rmkz, 0 },
{ X86::VMAXCPDZ256rrkz, X86::VMAXCPDZ256rmkz, 0 },
{ X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmkz, 0 },
{ X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0 },
@@ -2516,6 +2528,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VANDPSZrrk, X86::VANDPSZrmk, 0 },
{ X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 },
{ X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 },
+ { X86::VINSERTF32x4Zrrk, X86::VINSERTF32x4Zrmk, 0 },
+ { X86::VINSERTF32x8Zrrk, X86::VINSERTF32x8Zrmk, 0 },
+ { X86::VINSERTF64x2Zrrk, X86::VINSERTF64x2Zrmk, 0 },
+ { X86::VINSERTF64x4Zrrk, X86::VINSERTF64x4Zrmk, 0 },
+ { X86::VINSERTI32x4Zrrk, X86::VINSERTI32x4Zrmk, 0 },
+ { X86::VINSERTI32x8Zrrk, X86::VINSERTI32x8Zrmk, 0 },
+ { X86::VINSERTI64x2Zrrk, X86::VINSERTI64x2Zrmk, 0 },
+ { X86::VINSERTI64x4Zrrk, X86::VINSERTI64x4Zrmk, 0 },
{ X86::VMAXCPDZrrk, X86::VMAXCPDZrmk, 0 },
{ X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0 },
{ X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 },
@@ -2581,6 +2601,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VANDPSZ256rrk, X86::VANDPSZ256rmk, 0 },
{ X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 },
{ X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 },
+ { X86::VINSERTF32x4Z256rrk,X86::VINSERTF32x4Z256rmk, 0 },
+ { X86::VINSERTF64x2Z256rrk,X86::VINSERTF64x2Z256rmk, 0 },
+ { X86::VINSERTI32x4Z256rrk,X86::VINSERTI32x4Z256rmk, 0 },
+ { X86::VINSERTI64x2Z256rrk,X86::VINSERTI64x2Z256rmk, 0 },
{ X86::VMAXCPDZ256rrk, X86::VMAXCPDZ256rmk, 0 },
{ X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmk, 0 },
{ X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0 },
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll?rev=288481&r1=288480&r2=288481&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll Fri Dec 2 00:24:38 2016
@@ -496,5 +496,25 @@ define <8 x double> @stack_fold_insertf6
ret <8 x double> %2
}
+define <8 x double> @stack_fold_insertf64x4_mask(<8 x double> %passthru, <4 x double> %a0, <4 x double> %a1, i8 %mask) {
+ ;CHECK-LABEL: stack_fold_insertf64x4_mask
+ ;CHECK: vinsertf64x4 $1, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %3 = bitcast i8 %mask to <8 x i1>
+ %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %passthru
+ ret <8 x double> %4
+}
+
+define <8 x double> @stack_fold_insertf64x4_maskz(<4 x double> %a0, <4 x double> %a1, i8 %mask) {
+ ;CHECK-LABEL: stack_fold_insertf64x4_maskz
+ ;CHECK: vinsertf64x4 $1, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %3 = bitcast i8 %mask to <8 x i1>
+ %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
+ ret <8 x double> %4
+}
+
attributes #0 = { "unsafe-fp-math"="false" }
attributes #1 = { "unsafe-fp-math"="true" }
More information about the llvm-commits
mailing list