[llvm] r295579 - [X86][XOP] Reduce the size of a multiclass by moving more stuff to parameters instead of doing 128-bit and 256-bit simultaneously.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 18 14:53:44 PST 2017
Author: ctopper
Date: Sat Feb 18 16:53:43 2017
New Revision: 295579
URL: http://llvm.org/viewvc/llvm-project?rev=295579&view=rev
Log:
[X86][XOP] Reduce the size of a multiclass by moving more stuff to parameters instead of doing 128-bit and 256-bit simultaneously.
This requires some instructions to be renamed to move the Y earlier in the instruction name. The new names are more consistent with other instructions.
Modified:
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86InstrXOP.td
llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=295579&r1=295578&r2=295579&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sat Feb 18 16:53:43 2017
@@ -239,6 +239,7 @@ def X86vpermil2 : SDNode<"X86ISD::VPERMI
SDTCisSameAs<0,2>,
SDTCisSameSizeAs<0,3>,
SDTCisSameNumEltsAs<0, 3>,
+ SDTCisFP<0>, SDTCisInt<3>,
SDTCisVT<4, i8>]>>;
def X86vpperm : SDNode<"X86ISD::VPPERM",
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=295579&r1=295578&r2=295579&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sat Feb 18 16:53:43 2017
@@ -1787,9 +1787,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPCOMUQri, X86::VPCOMUQmi, 0 },
{ X86::VPCOMUWri, X86::VPCOMUWmi, 0 },
{ X86::VPERMIL2PDrr, X86::VPERMIL2PDmr, 0 },
- { X86::VPERMIL2PDrrY, X86::VPERMIL2PDmrY, 0 },
+ { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYmr, 0 },
{ X86::VPERMIL2PSrr, X86::VPERMIL2PSmr, 0 },
- { X86::VPERMIL2PSrrY, X86::VPERMIL2PSmrY, 0 },
+ { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYmr, 0 },
{ X86::VPMACSDDrr, X86::VPMACSDDrm, 0 },
{ X86::VPMACSDQHrr, X86::VPMACSDQHrm, 0 },
{ X86::VPMACSDQLrr, X86::VPMACSDQLrm, 0 },
@@ -2480,9 +2480,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPCMOVrrr, X86::VPCMOVrrm, 0 },
{ X86::VPCMOVYrrr, X86::VPCMOVYrrm, 0 },
{ X86::VPERMIL2PDrr, X86::VPERMIL2PDrm, 0 },
- { X86::VPERMIL2PDrrY, X86::VPERMIL2PDrmY, 0 },
+ { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYrm, 0 },
{ X86::VPERMIL2PSrr, X86::VPERMIL2PSrm, 0 },
- { X86::VPERMIL2PSrrY, X86::VPERMIL2PSrmY, 0 },
+ { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYrm, 0 },
{ X86::VPPERMrrr, X86::VPPERMrrm, 0 },
// AVX-512 instructions with 3 source operands.
Modified: llvm/trunk/lib/Target/X86/X86InstrXOP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrXOP.td?rev=295579&r1=295578&r2=295579&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrXOP.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrXOP.td Sat Feb 18 16:53:43 2017
@@ -292,7 +292,6 @@ let ExeDomain = SSEPackedInt in {
// Instruction where either second or third source can be memory
multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, ValueType VT> {
- // 128-bit Instruction
def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
@@ -327,79 +326,51 @@ let ExeDomain = SSEPackedInt in {
defm VPCMOVY : xop4op_int<0xA2, "vpcmov", VR256, i256mem, v4i64>, VEX_L;
}
-multiclass xop5op<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType vt128, ValueType vt256,
- ValueType id128, ValueType id256,
- PatFrag ld_128, PatFrag ld_256> {
- def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR128:$dst,
- (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
- (id128 VR128:$src3), (i8 imm:$src4))))]>;
- def rm : IXOP5<opc, MRMSrcMemOp4, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i128mem:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR128:$dst,
- (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
- (id128 (bitconvert (loadv2i64 addr:$src3))),
- (i8 imm:$src4))))]>,
- VEX_W;
- def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2, VR128:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR128:$dst,
- (vt128 (OpNode (vt128 VR128:$src1),
- (vt128 (bitconvert (ld_128 addr:$src2))),
- (id128 VR128:$src3), (i8 imm:$src4))))]>;
+multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand intmemop, X86MemOperand fpmemop,
+ ValueType VT, PatFrag FPLdFrag,
+ PatFrag IntLdFrag> {
+ def rr : IXOP5<Opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ [(set RC:$dst,
+ (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>;
+ def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, intmemop:$src3, u8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ [(set RC:$dst,
+ (VT (X86vpermil2 RC:$src1, RC:$src2,
+ (bitconvert (IntLdFrag addr:$src3)),
+ (i8 imm:$src4))))]>, VEX_W;
+ def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ [(set RC:$dst,
+ (VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
+ RC:$src3, (i8 imm:$src4))))]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rr_REV : IXOP5<opc, MRMSrcRegOp4, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),
+ def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[]>, VEX_W;
-
- def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR256:$dst,
- (vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2),
- (id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L;
- def rmY : IXOP5<opc, MRMSrcMemOp4, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, i256mem:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR256:$dst,
- (vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2),
- (id256 (bitconvert (loadv4i64 addr:$src3))),
- (i8 imm:$src4))))]>, VEX_W, VEX_L;
- def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, f256mem:$src2, VR256:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR256:$dst,
- (vt256 (OpNode (vt256 VR256:$src1),
- (vt256 (bitconvert (ld_256 addr:$src2))),
- (id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L;
- // For disassembler
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rrY_REV : IXOP5<opc, MRMSrcRegOp4, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- []>, VEX_W, VEX_L;
}
-let ExeDomain = SSEPackedDouble in
- defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", X86vpermil2, v2f64, v4f64,
- v2i64, v4i64, loadv2f64, loadv4f64>;
+let ExeDomain = SSEPackedDouble in {
+ defm VPERMIL2PD : xop_vpermil2<0x49, "vpermil2pd", VR128, i128mem, f128mem,
+ v2f64, loadv2f64, loadv2i64>;
+ defm VPERMIL2PDY : xop_vpermil2<0x49, "vpermil2pd", VR256, i256mem, f256mem,
+ v4f64, loadv4f64, loadv4i64>, VEX_L;
+}
-let ExeDomain = SSEPackedSingle in
- defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", X86vpermil2, v4f32, v8f32,
- v4i32, v8i32, loadv4f32, loadv8f32>;
+let ExeDomain = SSEPackedSingle in {
+ defm VPERMIL2PS : xop_vpermil2<0x48, "vpermil2ps", VR128, i128mem, f128mem,
+ v4f32, loadv4f32, loadv2i64>;
+ defm VPERMIL2PSY : xop_vpermil2<0x48, "vpermil2ps", VR256, i256mem, f256mem,
+ v8f32, loadv8f32, loadv4i64>, VEX_L;
+}
Modified: llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MCInstLower.cpp?rev=295579&r1=295578&r2=295579&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86MCInstLower.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86MCInstLower.cpp Sat Feb 18 16:53:43 2017
@@ -1607,8 +1607,8 @@ void X86AsmPrinter::EmitInstruction(cons
case X86::VPERMIL2PDrm:
case X86::VPERMIL2PSrm:
- case X86::VPERMIL2PDrmY:
- case X86::VPERMIL2PSrmY: {
+ case X86::VPERMIL2PDYrm:
+ case X86::VPERMIL2PSYrm: {
if (!OutStreamer->isVerboseAsm())
break;
assert(MI->getNumOperands() >= 8 &&
@@ -1621,8 +1621,8 @@ void X86AsmPrinter::EmitInstruction(cons
unsigned ElSize;
switch (MI->getOpcode()) {
default: llvm_unreachable("Invalid opcode");
- case X86::VPERMIL2PSrm: case X86::VPERMIL2PSrmY: ElSize = 32; break;
- case X86::VPERMIL2PDrm: case X86::VPERMIL2PDrmY: ElSize = 64; break;
+ case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
+ case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
}
const MachineOperand &MaskOp = MI->getOperand(6);
More information about the llvm-commits
mailing list