[llvm] r287995 - [AVX-512] Add masked EVEX vpmovzx/sx instructions to load folding tables.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 27 00:55:32 PST 2016
Author: ctopper
Date: Sun Nov 27 02:55:31 2016
New Revision: 287995
URL: http://llvm.org/viewvc/llvm-project?rev=287995&view=rev
Log:
[AVX-512] Add masked EVEX vpmovzx/sx instructions to load folding tables.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=287995&r1=287994&r2=287995&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Nov 27 02:55:31 2016
@@ -2028,6 +2028,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VXORPSZ128rr, X86::VXORPSZ128rm, 0 },
{ X86::VXORPSZ256rr, X86::VXORPSZ256rm, 0 },
+ // AVX-512 masked foldable instructions
+ { X86::VPMOVSXBDZrrkz, X86::VPMOVSXBDZrmkz, 0 },
+ { X86::VPMOVSXBQZrrkz, X86::VPMOVSXBQZrmkz, 0 },
+ { X86::VPMOVSXBWZrrkz, X86::VPMOVSXBWZrmkz, 0 },
+ { X86::VPMOVSXDQZrrkz, X86::VPMOVSXDQZrmkz, 0 },
+ { X86::VPMOVSXWDZrrkz, X86::VPMOVSXWDZrmkz, 0 },
+ { X86::VPMOVSXWQZrrkz, X86::VPMOVSXWQZrmkz, 0 },
+ { X86::VPMOVZXBDZrrkz, X86::VPMOVZXBDZrmkz, 0 },
+ { X86::VPMOVZXBQZrrkz, X86::VPMOVZXBQZrmkz, 0 },
+ { X86::VPMOVZXBWZrrkz, X86::VPMOVZXBWZrmkz, 0 },
+ { X86::VPMOVZXDQZrrkz, X86::VPMOVZXDQZrmkz, 0 },
+ { X86::VPMOVZXWDZrrkz, X86::VPMOVZXWDZrmkz, 0 },
+ { X86::VPMOVZXWQZrrkz, X86::VPMOVZXWQZrmkz, 0 },
+
+ // AVX-512VL 256-bit masked foldable instructions
+ { X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz, 0 },
+ { X86::VPMOVSXBQZ256rrkz, X86::VPMOVSXBQZ256rmkz, 0 },
+ { X86::VPMOVSXBWZ256rrkz, X86::VPMOVSXBWZ256rmkz, 0 },
+ { X86::VPMOVSXDQZ256rrkz, X86::VPMOVSXDQZ256rmkz, 0 },
+ { X86::VPMOVSXWDZ256rrkz, X86::VPMOVSXWDZ256rmkz, 0 },
+ { X86::VPMOVSXWQZ256rrkz, X86::VPMOVSXWQZ256rmkz, 0 },
+ { X86::VPMOVZXBDZ256rrkz, X86::VPMOVZXBDZ256rmkz, 0 },
+ { X86::VPMOVZXBQZ256rrkz, X86::VPMOVZXBQZ256rmkz, 0 },
+ { X86::VPMOVZXBWZ256rrkz, X86::VPMOVZXBWZ256rmkz, 0 },
+ { X86::VPMOVZXDQZ256rrkz, X86::VPMOVZXDQZ256rmkz, 0 },
+ { X86::VPMOVZXWDZ256rrkz, X86::VPMOVZXWDZ256rmkz, 0 },
+ { X86::VPMOVZXWQZ256rrkz, X86::VPMOVZXWQZ256rmkz, 0 },
+
+ // AVX-512VL 128-bit masked foldable instructions
+ { X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, 0 },
+ { X86::VPMOVSXBQZ128rrkz, X86::VPMOVSXBQZ128rmkz, 0 },
+ { X86::VPMOVSXBWZ128rrkz, X86::VPMOVSXBWZ128rmkz, 0 },
+ { X86::VPMOVSXDQZ128rrkz, X86::VPMOVSXDQZ128rmkz, 0 },
+ { X86::VPMOVSXWDZ128rrkz, X86::VPMOVSXWDZ128rmkz, 0 },
+ { X86::VPMOVSXWQZ128rrkz, X86::VPMOVSXWQZ128rmkz, 0 },
+ { X86::VPMOVZXBDZ128rrkz, X86::VPMOVZXBDZ128rmkz, 0 },
+ { X86::VPMOVZXBQZ128rrkz, X86::VPMOVZXBQZ128rmkz, 0 },
+ { X86::VPMOVZXBWZ128rrkz, X86::VPMOVZXBWZ128rmkz, 0 },
+ { X86::VPMOVZXDQZ128rrkz, X86::VPMOVZXDQZ128rmkz, 0 },
+ { X86::VPMOVZXWDZ128rrkz, X86::VPMOVZXWDZ128rmkz, 0 },
+ { X86::VPMOVZXWQZ128rrkz, X86::VPMOVZXWQZ128rmkz, 0 },
+
// AES foldable instructions
{ X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },
{ X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 },
@@ -2305,6 +2347,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 },
{ X86::VXORPDZ128rrkz, X86::VXORPDZ128rmkz, 0 },
{ X86::VXORPSZ128rrkz, X86::VXORPSZ128rmkz, 0 },
+
+ // AVX-512 masked foldable instructions
+ { X86::VPMOVSXBDZrrk, X86::VPMOVSXBDZrmk, 0 },
+ { X86::VPMOVSXBQZrrk, X86::VPMOVSXBQZrmk, 0 },
+ { X86::VPMOVSXBWZrrk, X86::VPMOVSXBWZrmk, 0 },
+ { X86::VPMOVSXDQZrrk, X86::VPMOVSXDQZrmk, 0 },
+ { X86::VPMOVSXWDZrrk, X86::VPMOVSXWDZrmk, 0 },
+ { X86::VPMOVSXWQZrrk, X86::VPMOVSXWQZrmk, 0 },
+ { X86::VPMOVZXBDZrrk, X86::VPMOVZXBDZrmk, 0 },
+ { X86::VPMOVZXBQZrrk, X86::VPMOVZXBQZrmk, 0 },
+ { X86::VPMOVZXBWZrrk, X86::VPMOVZXBWZrmk, 0 },
+ { X86::VPMOVZXDQZrrk, X86::VPMOVZXDQZrmk, 0 },
+ { X86::VPMOVZXWDZrrk, X86::VPMOVZXWDZrmk, 0 },
+ { X86::VPMOVZXWQZrrk, X86::VPMOVZXWQZrmk, 0 },
+
+ // AVX-512VL 256-bit masked foldable instructions
+ { X86::VPMOVSXBDZ256rrk, X86::VPMOVSXBDZ256rmk, 0 },
+ { X86::VPMOVSXBQZ256rrk, X86::VPMOVSXBQZ256rmk, 0 },
+ { X86::VPMOVSXBWZ256rrk, X86::VPMOVSXBWZ256rmk, 0 },
+ { X86::VPMOVSXDQZ256rrk, X86::VPMOVSXDQZ256rmk, 0 },
+ { X86::VPMOVSXWDZ256rrk, X86::VPMOVSXWDZ256rmk, 0 },
+ { X86::VPMOVSXWQZ256rrk, X86::VPMOVSXWQZ256rmk, 0 },
+ { X86::VPMOVZXBDZ256rrk, X86::VPMOVZXBDZ256rmk, 0 },
+ { X86::VPMOVZXBQZ256rrk, X86::VPMOVZXBQZ256rmk, 0 },
+ { X86::VPMOVZXBWZ256rrk, X86::VPMOVZXBWZ256rmk, 0 },
+ { X86::VPMOVZXDQZ256rrk, X86::VPMOVZXDQZ256rmk, 0 },
+ { X86::VPMOVZXWDZ256rrk, X86::VPMOVZXWDZ256rmk, 0 },
+ { X86::VPMOVZXWQZ256rrk, X86::VPMOVZXWQZ256rmk, 0 },
+
+ // AVX-512VL 128-bit masked foldable instructions
+ { X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, 0 },
+ { X86::VPMOVSXBQZ128rrk, X86::VPMOVSXBQZ128rmk, 0 },
+ { X86::VPMOVSXBWZ128rrk, X86::VPMOVSXBWZ128rmk, 0 },
+ { X86::VPMOVSXDQZ128rrk, X86::VPMOVSXDQZ128rmk, 0 },
+ { X86::VPMOVSXWDZ128rrk, X86::VPMOVSXWDZ128rmk, 0 },
+ { X86::VPMOVSXWQZ128rrk, X86::VPMOVSXWQZ128rmk, 0 },
+ { X86::VPMOVZXBDZ128rrk, X86::VPMOVZXBDZ128rmk, 0 },
+ { X86::VPMOVZXBQZ128rrk, X86::VPMOVZXBQZ128rmk, 0 },
+ { X86::VPMOVZXBWZ128rrk, X86::VPMOVZXBWZ128rmk, 0 },
+ { X86::VPMOVZXDQZ128rrk, X86::VPMOVZXDQZ128rmk, 0 },
+ { X86::VPMOVZXWDZ128rrk, X86::VPMOVZXWDZ128rmk, 0 },
+ { X86::VPMOVZXWQZ128rrk, X86::VPMOVZXWQZ128rmk, 0 },
};
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) {
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll?rev=287995&r1=287994&r2=287995&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll Sun Nov 27 02:55:31 2016
@@ -583,6 +583,26 @@ define <8 x i64> @stack_fold_pmovsxwq_zm
ret <8 x i64> %2
}
+define <8 x i64> @stack_fold_pmovsxwq_mask_zmm(<8 x i64> %passthru, <8 x i16> %a0, i8 %mask) {
+ ;CHECK-LABEL: stack_fold_pmovsxwq_mask_zmm
+ ;CHECK: vpmovsxwq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = sext <8 x i16> %a0 to <8 x i64>
+ %3 = bitcast i8 %mask to <8 x i1>
+ %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> %passthru
+ ret <8 x i64> %4
+}
+
+define <8 x i64> @stack_fold_pmovsxwq_maskz_zmm(<8 x i16> %a0, i8 %mask) {
+ ;CHECK-LABEL: stack_fold_pmovsxwq_maskz_zmm
+ ;CHECK: vpmovsxwq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = sext <8 x i16> %a0 to <8 x i64>
+ %3 = bitcast i8 %mask to <8 x i1>
+ %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer
+ ret <8 x i64> %4
+}
+
define <16 x i32> @stack_fold_pmovzxbd_zmm(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovzxbd_zmm
;CHECK: vpmovzxbd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
@@ -631,3 +651,23 @@ define <8 x i64> @stack_fold_pmovzxwq_zm
%2 = zext <8 x i16> %a0 to <8 x i64>
ret <8 x i64> %2
}
+
+define <8 x i64> @stack_fold_pmovzxwq_mask_zmm(<8 x i64> %passthru, <8 x i16> %a0, i8 %mask) {
+ ;CHECK-LABEL: stack_fold_pmovzxwq_mask_zmm
+ ;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = zext <8 x i16> %a0 to <8 x i64>
+ %3 = bitcast i8 %mask to <8 x i1>
+ %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> %passthru
+ ret <8 x i64> %4
+}
+
+define <8 x i64> @stack_fold_pmovzxwq_maskz_zmm(<8 x i16> %a0, i8 %mask) {
+ ;CHECK-LABEL: stack_fold_pmovzxwq_maskz_zmm
+ ;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = zext <8 x i16> %a0 to <8 x i64>
+ %3 = bitcast i8 %mask to <8 x i1>
+ %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer
+ ret <8 x i64> %4
+}
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll?rev=287995&r1=287994&r2=287995&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll Sun Nov 27 02:55:31 2016
@@ -908,3 +908,27 @@ define <4 x i64> @stack_fold_pmovzxwq_ym
%3 = zext <4 x i16> %2 to <4 x i64>
ret <4 x i64> %3
}
+
+define <4 x i64> @stack_fold_pmovzxwq_maskz_ymm(<8 x i16> %a0, i8 %mask) {
+ ;CHECK-LABEL: stack_fold_pmovzxwq_maskz_ymm
+ ;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = zext <4 x i16> %2 to <4 x i64>
+ %4 = bitcast i8 %mask to <8 x i1>
+ %5 = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> zeroinitializer
+ ret <4 x i64> %6
+}
+
+define <4 x i64> @stack_fold_pmovzxwq_mask_ymm(<4 x i64> %passthru, <8 x i16> %a0, i8 %mask) {
+ ;CHECK-LABEL: stack_fold_pmovzxwq_mask_ymm
+ ;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = zext <4 x i16> %2 to <4 x i64>
+ %4 = bitcast i8 %mask to <8 x i1>
+ %5 = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %passthru
+ ret <4 x i64> %6
+}
More information about the llvm-commits
mailing list