[llvm] r287995 - [AVX-512] Add masked EVEX vpmovzx/sx instructions to load folding tables.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 27 00:55:32 PST 2016


Author: ctopper
Date: Sun Nov 27 02:55:31 2016
New Revision: 287995

URL: http://llvm.org/viewvc/llvm-project?rev=287995&view=rev
Log:
[AVX-512] Add masked EVEX vpmovzx/sx instructions to load folding tables.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
    llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=287995&r1=287994&r2=287995&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Nov 27 02:55:31 2016
@@ -2028,6 +2028,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VXORPSZ128rr,      X86::VXORPSZ128rm,        0 },
     { X86::VXORPSZ256rr,      X86::VXORPSZ256rm,        0 },
 
+    // AVX-512 masked foldable instructions
+    { X86::VPMOVSXBDZrrkz,    X86::VPMOVSXBDZrmkz,      0 },
+    { X86::VPMOVSXBQZrrkz,    X86::VPMOVSXBQZrmkz,      0 },
+    { X86::VPMOVSXBWZrrkz,    X86::VPMOVSXBWZrmkz,      0 },
+    { X86::VPMOVSXDQZrrkz,    X86::VPMOVSXDQZrmkz,      0 },
+    { X86::VPMOVSXWDZrrkz,    X86::VPMOVSXWDZrmkz,      0 },
+    { X86::VPMOVSXWQZrrkz,    X86::VPMOVSXWQZrmkz,      0 },
+    { X86::VPMOVZXBDZrrkz,    X86::VPMOVZXBDZrmkz,      0 },
+    { X86::VPMOVZXBQZrrkz,    X86::VPMOVZXBQZrmkz,      0 },
+    { X86::VPMOVZXBWZrrkz,    X86::VPMOVZXBWZrmkz,      0 },
+    { X86::VPMOVZXDQZrrkz,    X86::VPMOVZXDQZrmkz,      0 },
+    { X86::VPMOVZXWDZrrkz,    X86::VPMOVZXWDZrmkz,      0 },
+    { X86::VPMOVZXWQZrrkz,    X86::VPMOVZXWQZrmkz,      0 },
+
+    // AVX-512VL 256-bit masked foldable instructions
+    { X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz,   0 },
+    { X86::VPMOVSXBQZ256rrkz, X86::VPMOVSXBQZ256rmkz,   0 },
+    { X86::VPMOVSXBWZ256rrkz, X86::VPMOVSXBWZ256rmkz,   0 },
+    { X86::VPMOVSXDQZ256rrkz, X86::VPMOVSXDQZ256rmkz,   0 },
+    { X86::VPMOVSXWDZ256rrkz, X86::VPMOVSXWDZ256rmkz,   0 },
+    { X86::VPMOVSXWQZ256rrkz, X86::VPMOVSXWQZ256rmkz,   0 },
+    { X86::VPMOVZXBDZ256rrkz, X86::VPMOVZXBDZ256rmkz,   0 },
+    { X86::VPMOVZXBQZ256rrkz, X86::VPMOVZXBQZ256rmkz,   0 },
+    { X86::VPMOVZXBWZ256rrkz, X86::VPMOVZXBWZ256rmkz,   0 },
+    { X86::VPMOVZXDQZ256rrkz, X86::VPMOVZXDQZ256rmkz,   0 },
+    { X86::VPMOVZXWDZ256rrkz, X86::VPMOVZXWDZ256rmkz,   0 },
+    { X86::VPMOVZXWQZ256rrkz, X86::VPMOVZXWQZ256rmkz,   0 },
+
+    // AVX-512VL 128-bit masked foldable instructions
+    { X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz,   0 },
+    { X86::VPMOVSXBQZ128rrkz, X86::VPMOVSXBQZ128rmkz,   0 },
+    { X86::VPMOVSXBWZ128rrkz, X86::VPMOVSXBWZ128rmkz,   0 },
+    { X86::VPMOVSXDQZ128rrkz, X86::VPMOVSXDQZ128rmkz,   0 },
+    { X86::VPMOVSXWDZ128rrkz, X86::VPMOVSXWDZ128rmkz,   0 },
+    { X86::VPMOVSXWQZ128rrkz, X86::VPMOVSXWQZ128rmkz,   0 },
+    { X86::VPMOVZXBDZ128rrkz, X86::VPMOVZXBDZ128rmkz,   0 },
+    { X86::VPMOVZXBQZ128rrkz, X86::VPMOVZXBQZ128rmkz,   0 },
+    { X86::VPMOVZXBWZ128rrkz, X86::VPMOVZXBWZ128rmkz,   0 },
+    { X86::VPMOVZXDQZ128rrkz, X86::VPMOVZXDQZ128rmkz,   0 },
+    { X86::VPMOVZXWDZ128rrkz, X86::VPMOVZXWDZ128rmkz,   0 },
+    { X86::VPMOVZXWQZ128rrkz, X86::VPMOVZXWQZ128rmkz,   0 },
+
     // AES foldable instructions
     { X86::AESDECLASTrr,      X86::AESDECLASTrm,        TB_ALIGN_16 },
     { X86::AESDECrr,          X86::AESDECrm,            TB_ALIGN_16 },
@@ -2305,6 +2347,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VSUBPSZ128rrkz,        X86::VSUBPSZ128rmkz,        0 },
     { X86::VXORPDZ128rrkz,        X86::VXORPDZ128rmkz,        0 },
     { X86::VXORPSZ128rrkz,        X86::VXORPSZ128rmkz,        0 },
+
+    // AVX-512 masked foldable instructions
+    { X86::VPMOVSXBDZrrk,         X86::VPMOVSXBDZrmk,         0 },
+    { X86::VPMOVSXBQZrrk,         X86::VPMOVSXBQZrmk,         0 },
+    { X86::VPMOVSXBWZrrk,         X86::VPMOVSXBWZrmk,         0 },
+    { X86::VPMOVSXDQZrrk,         X86::VPMOVSXDQZrmk,         0 },
+    { X86::VPMOVSXWDZrrk,         X86::VPMOVSXWDZrmk,         0 },
+    { X86::VPMOVSXWQZrrk,         X86::VPMOVSXWQZrmk,         0 },
+    { X86::VPMOVZXBDZrrk,         X86::VPMOVZXBDZrmk,         0 },
+    { X86::VPMOVZXBQZrrk,         X86::VPMOVZXBQZrmk,         0 },
+    { X86::VPMOVZXBWZrrk,         X86::VPMOVZXBWZrmk,         0 },
+    { X86::VPMOVZXDQZrrk,         X86::VPMOVZXDQZrmk,         0 },
+    { X86::VPMOVZXWDZrrk,         X86::VPMOVZXWDZrmk,         0 },
+    { X86::VPMOVZXWQZrrk,         X86::VPMOVZXWQZrmk,         0 },
+
+    // AVX-512VL 256-bit masked foldable instructions
+    { X86::VPMOVSXBDZ256rrk,      X86::VPMOVSXBDZ256rmk,      0 },
+    { X86::VPMOVSXBQZ256rrk,      X86::VPMOVSXBQZ256rmk,      0 },
+    { X86::VPMOVSXBWZ256rrk,      X86::VPMOVSXBWZ256rmk,      0 },
+    { X86::VPMOVSXDQZ256rrk,      X86::VPMOVSXDQZ256rmk,      0 },
+    { X86::VPMOVSXWDZ256rrk,      X86::VPMOVSXWDZ256rmk,      0 },
+    { X86::VPMOVSXWQZ256rrk,      X86::VPMOVSXWQZ256rmk,      0 },
+    { X86::VPMOVZXBDZ256rrk,      X86::VPMOVZXBDZ256rmk,      0 },
+    { X86::VPMOVZXBQZ256rrk,      X86::VPMOVZXBQZ256rmk,      0 },
+    { X86::VPMOVZXBWZ256rrk,      X86::VPMOVZXBWZ256rmk,      0 },
+    { X86::VPMOVZXDQZ256rrk,      X86::VPMOVZXDQZ256rmk,      0 },
+    { X86::VPMOVZXWDZ256rrk,      X86::VPMOVZXWDZ256rmk,      0 },
+    { X86::VPMOVZXWQZ256rrk,      X86::VPMOVZXWQZ256rmk,      0 },
+
+    // AVX-512VL 128-bit masked foldable instructions
+    { X86::VPMOVSXBDZ128rrk,      X86::VPMOVSXBDZ128rmk,      0 },
+    { X86::VPMOVSXBQZ128rrk,      X86::VPMOVSXBQZ128rmk,      0 },
+    { X86::VPMOVSXBWZ128rrk,      X86::VPMOVSXBWZ128rmk,      0 },
+    { X86::VPMOVSXDQZ128rrk,      X86::VPMOVSXDQZ128rmk,      0 },
+    { X86::VPMOVSXWDZ128rrk,      X86::VPMOVSXWDZ128rmk,      0 },
+    { X86::VPMOVSXWQZ128rrk,      X86::VPMOVSXWQZ128rmk,      0 },
+    { X86::VPMOVZXBDZ128rrk,      X86::VPMOVZXBDZ128rmk,      0 },
+    { X86::VPMOVZXBQZ128rrk,      X86::VPMOVZXBQZ128rmk,      0 },
+    { X86::VPMOVZXBWZ128rrk,      X86::VPMOVZXBWZ128rmk,      0 },
+    { X86::VPMOVZXDQZ128rrk,      X86::VPMOVZXDQZ128rmk,      0 },
+    { X86::VPMOVZXWDZ128rrk,      X86::VPMOVZXWDZ128rmk,      0 },
+    { X86::VPMOVZXWQZ128rrk,      X86::VPMOVZXWQZ128rmk,      0 },
   };
 
   for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) {

Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll?rev=287995&r1=287994&r2=287995&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll Sun Nov 27 02:55:31 2016
@@ -583,6 +583,26 @@ define <8 x i64> @stack_fold_pmovsxwq_zm
   ret <8 x i64> %2
 }
 
+define <8 x i64> @stack_fold_pmovsxwq_mask_zmm(<8 x i64> %passthru, <8 x i16> %a0, i8 %mask) {
+  ;CHECK-LABEL: stack_fold_pmovsxwq_mask_zmm
+  ;CHECK:       vpmovsxwq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = sext <8 x i16> %a0 to <8 x i64>
+  %3 = bitcast i8 %mask to <8 x i1>
+  %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> %passthru
+  ret <8 x i64> %4
+}
+
+define <8 x i64> @stack_fold_pmovsxwq_maskz_zmm(<8 x i16> %a0, i8 %mask) {
+  ;CHECK-LABEL: stack_fold_pmovsxwq_maskz_zmm
+  ;CHECK:       vpmovsxwq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = sext <8 x i16> %a0 to <8 x i64>
+  %3 = bitcast i8 %mask to <8 x i1>
+  %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer
+  ret <8 x i64> %4
+}
+
 define <16 x i32> @stack_fold_pmovzxbd_zmm(<16 x i8> %a0) {
   ;CHECK-LABEL: stack_fold_pmovzxbd_zmm
   ;CHECK:       vpmovzxbd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
@@ -631,3 +651,23 @@ define <8 x i64> @stack_fold_pmovzxwq_zm
   %2 = zext <8 x i16> %a0 to <8 x i64>
   ret <8 x i64> %2
 }
+
+define <8 x i64> @stack_fold_pmovzxwq_mask_zmm(<8 x i64> %passthru, <8 x i16> %a0, i8 %mask) {
+  ;CHECK-LABEL: stack_fold_pmovzxwq_mask_zmm
+  ;CHECK:       vpmovzxwq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = zext <8 x i16> %a0 to <8 x i64>
+  %3 = bitcast i8 %mask to <8 x i1>
+  %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> %passthru
+  ret <8 x i64> %4
+}
+
+define <8 x i64> @stack_fold_pmovzxwq_maskz_zmm(<8 x i16> %a0, i8 %mask) {
+  ;CHECK-LABEL: stack_fold_pmovzxwq_maskz_zmm
+  ;CHECK:       vpmovzxwq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = zext <8 x i16> %a0 to <8 x i64>
+  %3 = bitcast i8 %mask to <8 x i1>
+  %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer
+  ret <8 x i64> %4
+}

Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll?rev=287995&r1=287994&r2=287995&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll Sun Nov 27 02:55:31 2016
@@ -908,3 +908,27 @@ define <4 x i64> @stack_fold_pmovzxwq_ym
   %3 = zext <4 x i16> %2 to <4 x i64>
   ret <4 x i64> %3
 }
+
+define <4 x i64> @stack_fold_pmovzxwq_maskz_ymm(<8 x i16> %a0, i8 %mask) {
+  ;CHECK-LABEL: stack_fold_pmovzxwq_maskz_ymm
+  ;CHECK:       vpmovzxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = zext <4 x i16> %2 to <4 x i64>
+  %4 = bitcast i8 %mask to <8 x i1>
+  %5 = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> zeroinitializer
+  ret <4 x i64> %6
+}
+
+define <4 x i64> @stack_fold_pmovzxwq_mask_ymm(<4 x i64> %passthru, <8 x i16> %a0, i8 %mask) {
+  ;CHECK-LABEL: stack_fold_pmovzxwq_mask_ymm
+  ;CHECK:       vpmovzxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = zext <4 x i16> %2 to <4 x i64>
+  %4 = bitcast i8 %mask to <8 x i1>
+  %5 = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %passthru
+  ret <4 x i64> %6
+}




More information about the llvm-commits mailing list