[llvm] r305276 - [AVX-512] Mark masked VPCMP instructions as commutable.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 13 00:13:50 PDT 2017
Author: ctopper
Date: Tue Jun 13 02:13:50 2017
New Revision: 305276
URL: http://llvm.org/viewvc/llvm-project?rev=305276&view=rev
Log:
[AVX-512] Mark masked VPCMP instructions as commutable.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=305276&r1=305275&r2=305276&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Jun 13 02:13:50 2017
@@ -1765,6 +1765,7 @@ multiclass avx512_icmp_cc<bits<8> opc, s
(_.VT (bitconvert (_.LdFrag addr:$src2))),
imm:$cc))],
IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ let isCommutable = 1 in
def rrik : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
AVX512ICC:$cc),
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=305276&r1=305275&r2=305276&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Tue Jun 13 02:13:50 2017
@@ -5230,20 +5230,32 @@ MachineInstr *X86InstrInfo::commuteInstr
return nullptr;
}
}
- case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri:
- case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri:
- case X86::VPCMPBZrri: case X86::VPCMPUBZrri:
- case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri:
- case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri:
- case X86::VPCMPDZrri: case X86::VPCMPUDZrri:
- case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri:
- case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri:
- case X86::VPCMPQZrri: case X86::VPCMPUQZrri:
- case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri:
- case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri:
- case X86::VPCMPWZrri: case X86::VPCMPUWZrri: {
+ case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri:
+ case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri:
+ case X86::VPCMPBZrri: case X86::VPCMPUBZrri:
+ case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri:
+ case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri:
+ case X86::VPCMPDZrri: case X86::VPCMPUDZrri:
+ case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri:
+ case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri:
+ case X86::VPCMPQZrri: case X86::VPCMPUQZrri:
+ case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri:
+ case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri:
+ case X86::VPCMPWZrri: case X86::VPCMPUWZrri:
+ case X86::VPCMPBZ128rrik: case X86::VPCMPUBZ128rrik:
+ case X86::VPCMPBZ256rrik: case X86::VPCMPUBZ256rrik:
+ case X86::VPCMPBZrrik: case X86::VPCMPUBZrrik:
+ case X86::VPCMPDZ128rrik: case X86::VPCMPUDZ128rrik:
+ case X86::VPCMPDZ256rrik: case X86::VPCMPUDZ256rrik:
+ case X86::VPCMPDZrrik: case X86::VPCMPUDZrrik:
+ case X86::VPCMPQZ128rrik: case X86::VPCMPUQZ128rrik:
+ case X86::VPCMPQZ256rrik: case X86::VPCMPUQZ256rrik:
+ case X86::VPCMPQZrrik: case X86::VPCMPUQZrrik:
+ case X86::VPCMPWZ128rrik: case X86::VPCMPUWZ128rrik:
+ case X86::VPCMPWZ256rrik: case X86::VPCMPUWZ256rrik:
+ case X86::VPCMPWZrrik: case X86::VPCMPUWZrrik: {
// Flip comparison mode immediate (if necessary).
- unsigned Imm = MI.getOperand(3).getImm() & 0x7;
+ unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x7;
switch (Imm) {
default: llvm_unreachable("Unreachable!");
case 0x01: Imm = 0x06; break; // LT -> NLE
@@ -5257,7 +5269,7 @@ MachineInstr *X86InstrInfo::commuteInstr
break;
}
auto &WorkingMI = cloneIfNew(MI);
- WorkingMI.getOperand(3).setImm(Imm);
+ WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll?rev=305276&r1=305275&r2=305276&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll Tue Jun 13 02:13:50 2017
@@ -546,6 +546,19 @@ define i16 @stack_fold_pcmpled_mask(<16
ret i16 %7
}
+define i16 @stack_fold_pcmpleud(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %a2, i16 %mask) {
+ ;CHECK-LABEL: stack_fold_pcmpleud
+ ;CHECK: vpcmpleud {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-7]}} {{.*#+}} 64-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = load <16 x i32>, <16 x i32>* %a2
+ %3 = add <16 x i32> %a1, %2
+ %4 = bitcast i16 %mask to <16 x i1>
+ %5 = icmp uge <16 x i32> %a0, %3
+ %6 = and <16 x i1> %5, %4
+ %7 = bitcast <16 x i1> %6 to i16
+ ret i16 %7
+}
+
define <64 x i8> @stack_fold_permbvar(<64 x i8> %a0, <64 x i8> %a1) {
;CHECK-LABEL: stack_fold_permbvar
;CHECK: vpermb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
More information about the llvm-commits
mailing list