[llvm] r305275 - [AVX-512] Mark masked version of vpcmpeq as being commutable.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 13 00:13:47 PDT 2017
Author: ctopper
Date: Tue Jun 13 02:13:47 2017
New Revision: 305275
URL: http://llvm.org/viewvc/llvm-project?rev=305275&view=rev
Log:
[AVX-512] Mark masked version of vpcmpeq as being commutable.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=305275&r1=305274&r2=305275&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Jun 13 02:13:47 2017
@@ -1631,6 +1631,7 @@ multiclass avx512_icmp_packed<bits<8> op
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2)))))],
IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ let isCommutable = IsCommutable in
def rrk : AVX512BI<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll?rev=305275&r1=305274&r2=305275&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll Tue Jun 13 02:13:47 2017
@@ -518,6 +518,20 @@ define i16 @stack_fold_pcmpeqd_mask(<16
ret i16 %7
}
+define i16 @stack_fold_pcmpeqd_mask_commuted(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %a2, i16 %mask) {
+ ;CHECK-LABEL: stack_fold_pcmpeqd_mask_commuted
+ ;CHECK: vpcmpeqd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-7]}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ ; load and add are here to keep the operations below the side effecting block and to avoid folding the wrong load
+ %2 = load <16 x i32>, <16 x i32>* %a2
+ %3 = add <16 x i32> %a1, %2
+ %4 = bitcast i16 %mask to <16 x i1>
+ %5 = icmp eq <16 x i32> %a0, %3
+ %6 = and <16 x i1> %4, %5
+ %7 = bitcast <16 x i1> %6 to i16
+ ret i16 %7
+}
+
define i16 @stack_fold_pcmpled_mask(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %a2, i16 %mask) {
;CHECK-LABEL: stack_fold_pcmpled_mask
;CHECK: vpcmpled {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-7]}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
More information about the llvm-commits
mailing list