[llvm] r294286 - [AVX-512] Add masked shift instructions to load folding tables.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 6 23:30:57 PST 2017
Author: ctopper
Date: Tue Feb 7 01:30:57 2017
New Revision: 294286
URL: http://llvm.org/viewvc/llvm-project?rev=294286&view=rev
Log:
[AVX-512] Add masked shift instructions to load folding tables.
This adds the masked versions of everything, but the shift by immediate instructions.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=294286&r1=294285&r2=294286&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Tue Feb 7 01:30:57 2017
@@ -2478,6 +2478,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPORDZrrkz, X86::VPORDZrmkz, 0 },
{ X86::VPORQZrrkz, X86::VPORQZrmkz, 0 },
{ X86::VPSHUFBZrrkz, X86::VPSHUFBZrmkz, 0 },
+ { X86::VPSLLDZrrkz, X86::VPSLLDZrmkz, 0 },
+ { X86::VPSLLQZrrkz, X86::VPSLLQZrmkz, 0 },
+ { X86::VPSLLVDZrrkz, X86::VPSLLVDZrmkz, 0 },
+ { X86::VPSLLVQZrrkz, X86::VPSLLVQZrmkz, 0 },
+ { X86::VPSLLVWZrrkz, X86::VPSLLVWZrmkz, 0 },
+ { X86::VPSLLWZrrkz, X86::VPSLLWZrmkz, 0 },
+ { X86::VPSRADZrrkz, X86::VPSRADZrmkz, 0 },
+ { X86::VPSRAQZrrkz, X86::VPSRAQZrmkz, 0 },
+ { X86::VPSRAVDZrrkz, X86::VPSRAVDZrmkz, 0 },
+ { X86::VPSRAVQZrrkz, X86::VPSRAVQZrmkz, 0 },
+ { X86::VPSRAVWZrrkz, X86::VPSRAVWZrmkz, 0 },
+ { X86::VPSRAWZrrkz, X86::VPSRAWZrmkz, 0 },
+ { X86::VPSRLDZrrkz, X86::VPSRLDZrmkz, 0 },
+ { X86::VPSRLQZrrkz, X86::VPSRLQZrmkz, 0 },
+ { X86::VPSRLVDZrrkz, X86::VPSRLVDZrmkz, 0 },
+ { X86::VPSRLVQZrrkz, X86::VPSRLVQZrmkz, 0 },
+ { X86::VPSRLVWZrrkz, X86::VPSRLVWZrmkz, 0 },
+ { X86::VPSRLWZrrkz, X86::VPSRLWZrmkz, 0 },
{ X86::VPSUBBZrrkz, X86::VPSUBBZrmkz, 0 },
{ X86::VPSUBDZrrkz, X86::VPSUBDZrmkz, 0 },
{ X86::VPSUBQZrrkz, X86::VPSUBQZrmkz, 0 },
@@ -2567,6 +2585,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPORDZ256rrkz, X86::VPORDZ256rmkz, 0 },
{ X86::VPORQZ256rrkz, X86::VPORQZ256rmkz, 0 },
{ X86::VPSHUFBZ256rrkz, X86::VPSHUFBZ256rmkz, 0 },
+ { X86::VPSLLDZ256rrkz, X86::VPSLLDZ256rmkz, 0 },
+ { X86::VPSLLQZ256rrkz, X86::VPSLLQZ256rmkz, 0 },
+ { X86::VPSLLVDZ256rrkz, X86::VPSLLVDZ256rmkz, 0 },
+ { X86::VPSLLVQZ256rrkz, X86::VPSLLVQZ256rmkz, 0 },
+ { X86::VPSLLVWZ256rrkz, X86::VPSLLVWZ256rmkz, 0 },
+ { X86::VPSLLWZ256rrkz, X86::VPSLLWZ256rmkz, 0 },
+ { X86::VPSRADZ256rrkz, X86::VPSRADZ256rmkz, 0 },
+ { X86::VPSRAQZ256rrkz, X86::VPSRAQZ256rmkz, 0 },
+ { X86::VPSRAVDZ256rrkz, X86::VPSRAVDZ256rmkz, 0 },
+ { X86::VPSRAVQZ256rrkz, X86::VPSRAVQZ256rmkz, 0 },
+ { X86::VPSRAVWZ256rrkz, X86::VPSRAVWZ256rmkz, 0 },
+ { X86::VPSRAWZ256rrkz, X86::VPSRAWZ256rmkz, 0 },
+ { X86::VPSRLDZ256rrkz, X86::VPSRLDZ256rmkz, 0 },
+ { X86::VPSRLQZ256rrkz, X86::VPSRLQZ256rmkz, 0 },
+ { X86::VPSRLVDZ256rrkz, X86::VPSRLVDZ256rmkz, 0 },
+ { X86::VPSRLVQZ256rrkz, X86::VPSRLVQZ256rmkz, 0 },
+ { X86::VPSRLVWZ256rrkz, X86::VPSRLVWZ256rmkz, 0 },
+ { X86::VPSRLWZ256rrkz, X86::VPSRLWZ256rmkz, 0 },
{ X86::VPSUBBZ256rrkz, X86::VPSUBBZ256rmkz, 0 },
{ X86::VPSUBDZ256rrkz, X86::VPSUBDZ256rmkz, 0 },
{ X86::VPSUBQZ256rrkz, X86::VPSUBQZ256rmkz, 0 },
@@ -2646,6 +2682,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPORDZ128rrkz, X86::VPORDZ128rmkz, 0 },
{ X86::VPORQZ128rrkz, X86::VPORQZ128rmkz, 0 },
{ X86::VPSHUFBZ128rrkz, X86::VPSHUFBZ128rmkz, 0 },
+ { X86::VPSLLDZ128rrkz, X86::VPSLLDZ128rmkz, 0 },
+ { X86::VPSLLQZ128rrkz, X86::VPSLLQZ128rmkz, 0 },
+ { X86::VPSLLVDZ128rrkz, X86::VPSLLVDZ128rmkz, 0 },
+ { X86::VPSLLVQZ128rrkz, X86::VPSLLVQZ128rmkz, 0 },
+ { X86::VPSLLVWZ128rrkz, X86::VPSLLVWZ128rmkz, 0 },
+ { X86::VPSLLWZ128rrkz, X86::VPSLLWZ128rmkz, 0 },
+ { X86::VPSRADZ128rrkz, X86::VPSRADZ128rmkz, 0 },
+ { X86::VPSRAQZ128rrkz, X86::VPSRAQZ128rmkz, 0 },
+ { X86::VPSRAVDZ128rrkz, X86::VPSRAVDZ128rmkz, 0 },
+ { X86::VPSRAVQZ128rrkz, X86::VPSRAVQZ128rmkz, 0 },
+ { X86::VPSRAVWZ128rrkz, X86::VPSRAVWZ128rmkz, 0 },
+ { X86::VPSRAWZ128rrkz, X86::VPSRAWZ128rmkz, 0 },
+ { X86::VPSRLDZ128rrkz, X86::VPSRLDZ128rmkz, 0 },
+ { X86::VPSRLQZ128rrkz, X86::VPSRLQZ128rmkz, 0 },
+ { X86::VPSRLVDZ128rrkz, X86::VPSRLVDZ128rmkz, 0 },
+ { X86::VPSRLVQZ128rrkz, X86::VPSRLVQZ128rmkz, 0 },
+ { X86::VPSRLVWZ128rrkz, X86::VPSRLVWZ128rmkz, 0 },
+ { X86::VPSRLWZ128rrkz, X86::VPSRLWZ128rmkz, 0 },
{ X86::VPSUBBZ128rrkz, X86::VPSUBBZ128rmkz, 0 },
{ X86::VPSUBDZ128rrkz, X86::VPSUBDZ128rmkz, 0 },
{ X86::VPSUBQZ128rrkz, X86::VPSUBQZ128rmkz, 0 },
@@ -2862,6 +2916,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPORDZrrk, X86::VPORDZrmk, 0 },
{ X86::VPORQZrrk, X86::VPORQZrmk, 0 },
{ X86::VPSHUFBZrrk, X86::VPSHUFBZrmk, 0 },
+ { X86::VPSLLDZrrk, X86::VPSLLDZrmk, 0 },
+ { X86::VPSLLQZrrk, X86::VPSLLQZrmk, 0 },
+ { X86::VPSLLVDZrrk, X86::VPSLLVDZrmk, 0 },
+ { X86::VPSLLVQZrrk, X86::VPSLLVQZrmk, 0 },
+ { X86::VPSLLVWZrrk, X86::VPSLLVWZrmk, 0 },
+ { X86::VPSLLWZrrk, X86::VPSLLWZrmk, 0 },
+ { X86::VPSRADZrrk, X86::VPSRADZrmk, 0 },
+ { X86::VPSRAQZrrk, X86::VPSRAQZrmk, 0 },
+ { X86::VPSRAVDZrrk, X86::VPSRAVDZrmk, 0 },
+ { X86::VPSRAVQZrrk, X86::VPSRAVQZrmk, 0 },
+ { X86::VPSRAVWZrrk, X86::VPSRAVWZrmk, 0 },
+ { X86::VPSRAWZrrk, X86::VPSRAWZrmk, 0 },
+ { X86::VPSRLDZrrk, X86::VPSRLDZrmk, 0 },
+ { X86::VPSRLQZrrk, X86::VPSRLQZrmk, 0 },
+ { X86::VPSRLVDZrrk, X86::VPSRLVDZrmk, 0 },
+ { X86::VPSRLVQZrrk, X86::VPSRLVQZrmk, 0 },
+ { X86::VPSRLVWZrrk, X86::VPSRLVWZrmk, 0 },
+ { X86::VPSRLWZrrk, X86::VPSRLWZrmk, 0 },
{ X86::VPSUBBZrrk, X86::VPSUBBZrmk, 0 },
{ X86::VPSUBDZrrk, X86::VPSUBDZrmk, 0 },
{ X86::VPSUBQZrrk, X86::VPSUBQZrmk, 0 },
@@ -2964,6 +3036,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPORDZ256rrk, X86::VPORDZ256rmk, 0 },
{ X86::VPORQZ256rrk, X86::VPORQZ256rmk, 0 },
{ X86::VPSHUFBZ256rrk, X86::VPSHUFBZ256rmk, 0 },
+ { X86::VPSLLDZ256rrk, X86::VPSLLDZ256rmk, 0 },
+ { X86::VPSLLQZ256rrk, X86::VPSLLQZ256rmk, 0 },
+ { X86::VPSLLVDZ256rrk, X86::VPSLLVDZ256rmk, 0 },
+ { X86::VPSLLVQZ256rrk, X86::VPSLLVQZ256rmk, 0 },
+ { X86::VPSLLVWZ256rrk, X86::VPSLLVWZ256rmk, 0 },
+ { X86::VPSLLWZ256rrk, X86::VPSLLWZ256rmk, 0 },
+ { X86::VPSRADZ256rrk, X86::VPSRADZ256rmk, 0 },
+ { X86::VPSRAQZ256rrk, X86::VPSRAQZ256rmk, 0 },
+ { X86::VPSRAVDZ256rrk, X86::VPSRAVDZ256rmk, 0 },
+ { X86::VPSRAVQZ256rrk, X86::VPSRAVQZ256rmk, 0 },
+ { X86::VPSRAVWZ256rrk, X86::VPSRAVWZ256rmk, 0 },
+ { X86::VPSRAWZ256rrk, X86::VPSRAWZ256rmk, 0 },
+ { X86::VPSRLDZ256rrk, X86::VPSRLDZ256rmk, 0 },
+ { X86::VPSRLQZ256rrk, X86::VPSRLQZ256rmk, 0 },
+ { X86::VPSRLVDZ256rrk, X86::VPSRLVDZ256rmk, 0 },
+ { X86::VPSRLVQZ256rrk, X86::VPSRLVQZ256rmk, 0 },
+ { X86::VPSRLVWZ256rrk, X86::VPSRLVWZ256rmk, 0 },
+ { X86::VPSRLWZ256rrk, X86::VPSRLWZ256rmk, 0 },
{ X86::VPSUBBZ256rrk, X86::VPSUBBZ256rmk, 0 },
{ X86::VPSUBDZ256rrk, X86::VPSUBDZ256rmk, 0 },
{ X86::VPSUBQZ256rrk, X86::VPSUBQZ256rmk, 0 },
@@ -3057,6 +3147,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPORDZ128rrk, X86::VPORDZ128rmk, 0 },
{ X86::VPORQZ128rrk, X86::VPORQZ128rmk, 0 },
{ X86::VPSHUFBZ128rrk, X86::VPSHUFBZ128rmk, 0 },
+ { X86::VPSLLDZ128rrk, X86::VPSLLDZ128rmk, 0 },
+ { X86::VPSLLQZ128rrk, X86::VPSLLQZ128rmk, 0 },
+ { X86::VPSLLVDZ128rrk, X86::VPSLLVDZ128rmk, 0 },
+ { X86::VPSLLVQZ128rrk, X86::VPSLLVQZ128rmk, 0 },
+ { X86::VPSLLVWZ128rrk, X86::VPSLLVWZ128rmk, 0 },
+ { X86::VPSLLWZ128rrk, X86::VPSLLWZ128rmk, 0 },
+ { X86::VPSRADZ128rrk, X86::VPSRADZ128rmk, 0 },
+ { X86::VPSRAQZ128rrk, X86::VPSRAQZ128rmk, 0 },
+ { X86::VPSRAVDZ128rrk, X86::VPSRAVDZ128rmk, 0 },
+ { X86::VPSRAVQZ128rrk, X86::VPSRAVQZ128rmk, 0 },
+ { X86::VPSRAVWZ128rrk, X86::VPSRAVWZ128rmk, 0 },
+ { X86::VPSRAWZ128rrk, X86::VPSRAWZ128rmk, 0 },
+ { X86::VPSRLDZ128rrk, X86::VPSRLDZ128rmk, 0 },
+ { X86::VPSRLQZ128rrk, X86::VPSRLQZ128rmk, 0 },
+ { X86::VPSRLVDZ128rrk, X86::VPSRLVDZ128rmk, 0 },
+ { X86::VPSRLVQZ128rrk, X86::VPSRLVQZ128rmk, 0 },
+ { X86::VPSRLVWZ128rrk, X86::VPSRLVWZ128rmk, 0 },
+ { X86::VPSRLWZ128rrk, X86::VPSRLWZ128rmk, 0 },
{ X86::VPSUBBZ128rrk, X86::VPSUBBZ128rmk, 0 },
{ X86::VPSUBDZ128rrk, X86::VPSUBDZ128rmk, 0 },
{ X86::VPSUBQZ128rrk, X86::VPSUBQZ128rmk, 0 },
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll?rev=294286&r1=294285&r2=294286&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll Tue Feb 7 01:30:57 2017
@@ -1047,6 +1047,27 @@ define <16 x i32> @stack_fold_pslld(<16
}
declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) nounwind readnone
+define <16 x i32> @stack_fold_pslld_mask(<16 x i32>* %passthru, <16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
+ ;CHECK-LABEL: stack_fold_pslld_mask
+ ;CHECK: vpslld {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1)
+ %3 = bitcast i16 %mask to <16 x i1>
+ %4 = load <16 x i32>, <16 x i32>* %passthru
+ %5 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> %4
+ ret <16 x i32> %5
+}
+
+define <16 x i32> @stack_fold_pslld_maskz(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
+ ;CHECK-LABEL: stack_fold_pslld_maskz
+ ;CHECK: vpslld {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1)
+ %3 = bitcast i16 %mask to <16 x i1>
+ %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
+ ret <16 x i32> %4
+}
+
define <64 x i8> @stack_fold_pslldq(<64 x i8> %a, <64 x i8> %b) {
;CHECK-LABEL: stack_fold_pslldq
;CHECK: vpslldq $1, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
@@ -1073,6 +1094,27 @@ define <16 x i32> @stack_fold_psllvd(<16
}
declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) nounwind readnone
+define <16 x i32> @stack_fold_psllvd_mask(<16 x i32>* %passthru, <16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
+ ;CHECK-LABEL: stack_fold_psllvd_mask
+ ;CHECK: vpsllvd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1)
+ %3 = bitcast i16 %mask to <16 x i1>
+ %4 = load <16 x i32>, <16 x i32>* %passthru
+ %5 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> %4
+ ret <16 x i32> %5
+}
+
+define <16 x i32> @stack_fold_psllvd_maskz(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
+ ;CHECK-LABEL: stack_fold_psllvd_maskz
+ ;CHECK: vpsllvd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1)
+ %3 = bitcast i16 %mask to <16 x i1>
+ %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
+ ret <16 x i32> %4
+}
+
define <8 x i64> @stack_fold_psllvq(<8 x i64> %a0, <8 x i64> %a1) {
;CHECK-LABEL: stack_fold_psllvq
;CHECK: vpsllvq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
More information about the llvm-commits
mailing list