[llvm] r305274 - [X86] Add masked integer compare instructions to load folding tables.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 13 00:13:44 PDT 2017
Author: ctopper
Date: Tue Jun 13 02:13:44 2017
New Revision: 305274
URL: http://llvm.org/viewvc/llvm-project?rev=305274&view=rev
Log:
[X86] Add masked integer compare instructions to load folding tables.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=305274&r1=305273&r2=305274&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Tue Jun 13 02:13:44 2017
@@ -3070,6 +3070,64 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPSRLDZ128rik, X86::VPSRLDZ128mik, 0 },
{ X86::VPSRLQZ128rik, X86::VPSRLQZ128mik, 0 },
{ X86::VPSRLWZ128rik, X86::VPSRLWZ128mik, 0 },
+
+ // AVX-512 masked compare instructions
+ { X86::VCMPPDZ128rrik, X86::VCMPPDZ128rmik, 0 },
+ { X86::VCMPPSZ128rrik, X86::VCMPPSZ128rmik, 0 },
+ { X86::VCMPPDZ256rrik, X86::VCMPPDZ256rmik, 0 },
+ { X86::VCMPPSZ256rrik, X86::VCMPPSZ256rmik, 0 },
+ { X86::VCMPPDZrrik, X86::VCMPPDZrmik, 0 },
+ { X86::VCMPPSZrrik, X86::VCMPPSZrmik, 0 },
+ { X86::VCMPSDZrr_Intk, X86::VCMPSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VCMPSSZrr_Intk, X86::VCMPSSZrm_Intk, TB_NO_REVERSE },
+ { X86::VPCMPBZ128rrik, X86::VPCMPBZ128rmik, 0 },
+ { X86::VPCMPBZ256rrik, X86::VPCMPBZ256rmik, 0 },
+ { X86::VPCMPBZrrik, X86::VPCMPBZrmik, 0 },
+ { X86::VPCMPDZ128rrik, X86::VPCMPDZ128rmik, 0 },
+ { X86::VPCMPDZ256rrik, X86::VPCMPDZ256rmik, 0 },
+ { X86::VPCMPDZrrik, X86::VPCMPDZrmik, 0 },
+ { X86::VPCMPEQBZ128rrk, X86::VPCMPEQBZ128rmk, 0 },
+ { X86::VPCMPEQBZ256rrk, X86::VPCMPEQBZ256rmk, 0 },
+ { X86::VPCMPEQBZrrk, X86::VPCMPEQBZrmk, 0 },
+ { X86::VPCMPEQDZ128rrk, X86::VPCMPEQDZ128rmk, 0 },
+ { X86::VPCMPEQDZ256rrk, X86::VPCMPEQDZ256rmk, 0 },
+ { X86::VPCMPEQDZrrk, X86::VPCMPEQDZrmk, 0 },
+ { X86::VPCMPEQQZ128rrk, X86::VPCMPEQQZ128rmk, 0 },
+ { X86::VPCMPEQQZ256rrk, X86::VPCMPEQQZ256rmk, 0 },
+ { X86::VPCMPEQQZrrk, X86::VPCMPEQQZrmk, 0 },
+ { X86::VPCMPEQWZ128rrk, X86::VPCMPEQWZ128rmk, 0 },
+ { X86::VPCMPEQWZ256rrk, X86::VPCMPEQWZ256rmk, 0 },
+ { X86::VPCMPEQWZrrk, X86::VPCMPEQWZrmk, 0 },
+ { X86::VPCMPGTBZ128rrk, X86::VPCMPGTBZ128rmk, 0 },
+ { X86::VPCMPGTBZ256rrk, X86::VPCMPGTBZ256rmk, 0 },
+ { X86::VPCMPGTBZrrk, X86::VPCMPGTBZrmk, 0 },
+ { X86::VPCMPGTDZ128rrk, X86::VPCMPGTDZ128rmk, 0 },
+ { X86::VPCMPGTDZ256rrk, X86::VPCMPGTDZ256rmk, 0 },
+ { X86::VPCMPGTDZrrk, X86::VPCMPGTDZrmk, 0 },
+ { X86::VPCMPGTQZ128rrk, X86::VPCMPGTQZ128rmk, 0 },
+ { X86::VPCMPGTQZ256rrk, X86::VPCMPGTQZ256rmk, 0 },
+ { X86::VPCMPGTQZrrk, X86::VPCMPGTQZrmk, 0 },
+ { X86::VPCMPGTWZ128rrk, X86::VPCMPGTWZ128rmk, 0 },
+ { X86::VPCMPGTWZ256rrk, X86::VPCMPGTWZ256rmk, 0 },
+ { X86::VPCMPGTWZrrk, X86::VPCMPGTWZrmk, 0 },
+ { X86::VPCMPQZ128rrik, X86::VPCMPQZ128rmik, 0 },
+ { X86::VPCMPQZ256rrik, X86::VPCMPQZ256rmik, 0 },
+ { X86::VPCMPQZrrik, X86::VPCMPQZrmik, 0 },
+ { X86::VPCMPUBZ128rrik, X86::VPCMPUBZ128rmik, 0 },
+ { X86::VPCMPUBZ256rrik, X86::VPCMPUBZ256rmik, 0 },
+ { X86::VPCMPUBZrrik, X86::VPCMPUBZrmik, 0 },
+ { X86::VPCMPUDZ128rrik, X86::VPCMPUDZ128rmik, 0 },
+ { X86::VPCMPUDZ256rrik, X86::VPCMPUDZ256rmik, 0 },
+ { X86::VPCMPUDZrrik, X86::VPCMPUDZrmik, 0 },
+ { X86::VPCMPUQZ128rrik, X86::VPCMPUQZ128rmik, 0 },
+ { X86::VPCMPUQZ256rrik, X86::VPCMPUQZ256rmik, 0 },
+ { X86::VPCMPUQZrrik, X86::VPCMPUQZrmik, 0 },
+ { X86::VPCMPUWZ128rrik, X86::VPCMPUWZ128rmik, 0 },
+ { X86::VPCMPUWZ256rrik, X86::VPCMPUWZ256rmik, 0 },
+ { X86::VPCMPUWZrrik, X86::VPCMPUWZrmik, 0 },
+ { X86::VPCMPWZ128rrik, X86::VPCMPWZ128rmik, 0 },
+ { X86::VPCMPWZ256rrik, X86::VPCMPWZ256rmik, 0 },
+ { X86::VPCMPWZrrik, X86::VPCMPWZrmik, 0 },
};
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) {
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll?rev=305274&r1=305273&r2=305274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll Tue Jun 13 02:13:44 2017
@@ -504,6 +504,34 @@ define i32 @stack_fold_pcmpeqw(<32 x i16
ret i32 %3
}
+define i16 @stack_fold_pcmpeqd_mask(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %a2, i16 %mask) {
+ ;CHECK-LABEL: stack_fold_pcmpeqd_mask
+ ;CHECK: vpcmpeqd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-7]}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ ; load and add are here to keep the operations below the side effecting block and to avoid folding the wrong load
+ %2 = load <16 x i32>, <16 x i32>* %a2
+ %3 = add <16 x i32> %a1, %2
+ %4 = bitcast i16 %mask to <16 x i1>
+ %5 = icmp eq <16 x i32> %3, %a0
+ %6 = and <16 x i1> %4, %5
+ %7 = bitcast <16 x i1> %6 to i16
+ ret i16 %7
+}
+
+define i16 @stack_fold_pcmpled_mask(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %a2, i16 %mask) {
+ ;CHECK-LABEL: stack_fold_pcmpled_mask
+ ;CHECK: vpcmpled {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-7]}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ ; load and add are here to keep the operations below the side effecting block and to avoid folding the wrong load
+ %2 = load <16 x i32>, <16 x i32>* %a2
+ %3 = add <16 x i32> %a1, %2
+ %4 = bitcast i16 %mask to <16 x i1>
+ %5 = icmp sge <16 x i32> %a0, %3
+ %6 = and <16 x i1> %4, %5
+ %7 = bitcast <16 x i1> %6 to i16
+ ret i16 %7
+}
+
define <64 x i8> @stack_fold_permbvar(<64 x i8> %a0, <64 x i8> %a1) {
;CHECK-LABEL: stack_fold_permbvar
;CHECK: vpermb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
More information about the llvm-commits
mailing list