[llvm] r287972 - [AVX-512] Add masked 512-bit integer add/sub instructions to load folding tables.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 25 23:21:00 PST 2016
Author: ctopper
Date: Sat Nov 26 01:21:00 2016
New Revision: 287972
URL: http://llvm.org/viewvc/llvm-project?rev=287972&view=rev
Log:
[AVX-512] Add masked 512-bit integer add/sub instructions to load folding tables.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=287972&r1=287971&r2=287972&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sat Nov 26 01:21:00 2016
@@ -2141,12 +2141,28 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 },
{ X86::VORPDZrrkz, X86::VORPDZrmkz, 0 },
{ X86::VORPSZrrkz, X86::VORPSZrmkz, 0 },
+ { X86::VPADDBZrrkz, X86::VPADDBZrmkz, 0 },
+ { X86::VPADDDZrrkz, X86::VPADDDZrmkz, 0 },
+ { X86::VPADDQZrrkz, X86::VPADDQZrmkz, 0 },
+ { X86::VPADDSBZrrkz, X86::VPADDSBZrmkz, 0 },
+ { X86::VPADDSWZrrkz, X86::VPADDSWZrmkz, 0 },
+ { X86::VPADDUSBZrrkz, X86::VPADDUSBZrmkz, 0 },
+ { X86::VPADDUSWZrrkz, X86::VPADDUSWZrmkz, 0 },
+ { X86::VPADDWZrrkz, X86::VPADDWZrmkz, 0 },
{ X86::VPANDDZrrkz, X86::VPANDDZrmkz, 0 },
{ X86::VPANDNDZrrkz, X86::VPANDNDZrmkz, 0 },
{ X86::VPANDNQZrrkz, X86::VPANDNQZrmkz, 0 },
{ X86::VPANDQZrrkz, X86::VPANDQZrmkz, 0 },
{ X86::VPORDZrrkz, X86::VPORDZrmkz, 0 },
{ X86::VPORQZrrkz, X86::VPORQZrmkz, 0 },
+ { X86::VPSUBBZrrkz, X86::VPSUBBZrmkz, 0 },
+ { X86::VPSUBDZrrkz, X86::VPSUBDZrmkz, 0 },
+ { X86::VPSUBQZrrkz, X86::VPSUBQZrmkz, 0 },
+ { X86::VPSUBSBZrrkz, X86::VPSUBSBZrmkz, 0 },
+ { X86::VPSUBSWZrrkz, X86::VPSUBSWZrmkz, 0 },
+ { X86::VPSUBUSBZrrkz, X86::VPSUBUSBZrmkz, 0 },
+ { X86::VPSUBUSWZrrkz, X86::VPSUBUSWZrmkz, 0 },
+ { X86::VPSUBWZrrkz, X86::VPSUBWZrmkz, 0 },
{ X86::VPXORDZrrkz, X86::VPXORDZrmkz, 0 },
{ X86::VPXORQZrrkz, X86::VPXORQZrmkz, 0 },
{ X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 },
@@ -2259,12 +2275,27 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMULPSZrrk, X86::VMULPSZrmk, 0 },
{ X86::VORPDZrrk, X86::VORPDZrmk, 0 },
{ X86::VORPSZrrk, X86::VORPSZrmk, 0 },
+ { X86::VPADDBZrrk, X86::VPADDBZrmk, 0 },
+ { X86::VPADDDZrrk, X86::VPADDDZrmk, 0 },
+ { X86::VPADDQZrrk, X86::VPADDQZrmk, 0 },
+ { X86::VPADDSBZrrk, X86::VPADDSBZrmk, 0 },
+ { X86::VPADDSWZrrk, X86::VPADDSWZrmk, 0 },
+ { X86::VPADDUSBZrrk, X86::VPADDUSBZrmk, 0 },
+ { X86::VPADDUSWZrrk, X86::VPADDUSWZrmk, 0 },
+ { X86::VPADDWZrrk, X86::VPADDWZrmk, 0 },
{ X86::VPANDDZrrk, X86::VPANDDZrmk, 0 },
{ X86::VPANDNDZrrk, X86::VPANDNDZrmk, 0 },
{ X86::VPANDNQZrrk, X86::VPANDNQZrmk, 0 },
{ X86::VPANDQZrrk, X86::VPANDQZrmk, 0 },
{ X86::VPORDZrrk, X86::VPORDZrmk, 0 },
{ X86::VPORQZrrk, X86::VPORQZrmk, 0 },
+ { X86::VPSUBBZrrk, X86::VPSUBBZrmk, 0 },
+ { X86::VPSUBDZrrk, X86::VPSUBDZrmk, 0 },
+ { X86::VPSUBQZrrk, X86::VPSUBQZrmk, 0 },
+ { X86::VPSUBSBZrrk, X86::VPSUBSBZrmk, 0 },
+ { X86::VPSUBSWZrrk, X86::VPSUBSWZrmk, 0 },
+ { X86::VPSUBUSBZrrk, X86::VPSUBUSBZrmk, 0 },
+ { X86::VPSUBUSWZrrk, X86::VPSUBUSWZrmk, 0 },
{ X86::VPTERNLOGDZrrik, X86::VPTERNLOGDZrmik, 0 },
{ X86::VPTERNLOGDZrrikz, X86::VPTERNLOGDZrmikz, 0 },
{ X86::VPTERNLOGQZrrik, X86::VPTERNLOGQZrmik, 0 },
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll?rev=287972&r1=287971&r2=287972&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll Sat Nov 26 01:21:00 2016
@@ -16,6 +16,28 @@ define <64 x i8> @stack_fold_paddb(<64 x
ret <64 x i8> %2
}
+define <64 x i8> @stack_fold_paddb_mask(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %a2, i64 %mask) {
+ ;CHECK-LABEL: stack_fold_paddb_mask
+ ;CHECK: vpaddb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm0},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = add <64 x i8> %a0, %a1
+ %3 = bitcast i64 %mask to <64 x i1>
+ ; load needed to keep the operation from being scheduled about the asm block
+ %4 = load <64 x i8>, <64 x i8>* %a2
+ %5 = select <64 x i1> %3, <64 x i8> %2, <64 x i8> %4
+ ret <64 x i8> %5
+}
+
+define <64 x i8> @stack_fold_paddb_maskz(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
+ ;CHECK-LABEL: stack_fold_paddb_maskz
+ ;CHECK: vpaddb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = add <64 x i8> %a0, %a1
+ %3 = bitcast i64 %mask to <64 x i1>
+ %4 = select <64 x i1> %3, <64 x i8> %2, <64 x i8> zeroinitializer
+ ret <64 x i8> %4
+}
+
define <16 x i32> @stack_fold_paddd(<16 x i32> %a0, <16 x i32> %a1) {
;CHECK-LABEL: stack_fold_paddd
;CHECK: vpaddd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
More information about the llvm-commits
mailing list