[llvm] r272468 - [X86][SSE] Use vXi8 return type for PSLLDQ/PSRLDQ instructions
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 11 05:54:38 PDT 2016
Author: rksimon
Date: Sat Jun 11 07:54:37 2016
New Revision: 272468
URL: http://llvm.org/viewvc/llvm-project?rev=272468&view=rev
Log:
[X86][SSE] Use vXi8 return type for PSLLDQ/PSRLDQ instructions
These are byte shift instructions and it will make shuffle combining a lot more straightforward if we can assume a vXi8 vector of bytes so decoded shuffle masks match the return type's number of elements
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=272468&r1=272467&r2=272468&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jun 11 07:54:37 2016
@@ -5444,7 +5444,7 @@ static SDValue getVShift(bool isLeft, EV
unsigned NumBits, SelectionDAG &DAG,
const TargetLowering &TLI, SDLoc dl) {
assert(VT.is128BitVector() && "Unknown type for VShift");
- MVT ShVT = MVT::v2i64;
+ MVT ShVT = MVT::v16i8;
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
SrcOp = DAG.getBitcast(ShVT, SrcOp);
MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout(), VT);
@@ -7731,16 +7731,16 @@ static SDValue lowerVectorShuffleAsByteR
int LoByteShift = 16 - Rotation * Scale;
int HiByteShift = Rotation * Scale;
- // Cast the inputs to v2i64 to match PSLLDQ/PSRLDQ.
- Lo = DAG.getBitcast(MVT::v2i64, Lo);
- Hi = DAG.getBitcast(MVT::v2i64, Hi);
+ // Cast the inputs to v16i8 to match PSLLDQ/PSRLDQ.
+ Lo = DAG.getBitcast(MVT::v16i8, Lo);
+ Hi = DAG.getBitcast(MVT::v16i8, Hi);
- SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, Lo,
+ SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo,
DAG.getConstant(LoByteShift, DL, MVT::i8));
- SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, Hi,
+ SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi,
DAG.getConstant(HiByteShift, DL, MVT::i8));
return DAG.getBitcast(VT,
- DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift));
+ DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift));
}
/// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros).
@@ -7806,7 +7806,8 @@ static SDValue lowerVectorShuffleAsShift
// We need to round trip through the appropriate type for the shift.
MVT ShiftSVT = MVT::getIntegerVT(VT.getScalarSizeInBits() * Scale);
- MVT ShiftVT = MVT::getVectorVT(ShiftSVT, Size / Scale);
+ MVT ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8)
+ : MVT::getVectorVT(ShiftSVT, Size / Scale);
assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
"Illegal integer vector type");
V = DAG.getBitcast(ShiftVT, V);
@@ -8382,9 +8383,9 @@ static SDValue lowerVectorShuffleAsEleme
V2Shuffle[V2Index] = 0;
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);
} else {
- V2 = DAG.getBitcast(MVT::v2i64, V2);
+ V2 = DAG.getBitcast(MVT::v16i8, V2);
V2 = DAG.getNode(
- X86ISD::VSHLDQ, DL, MVT::v2i64, V2,
+ X86ISD::VSHLDQ, DL, MVT::v16i8, V2,
DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL,
DAG.getTargetLoweringInfo().getScalarShiftAmountTy(
DAG.getDataLayout(), VT)));
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=272468&r1=272467&r2=272468&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sat Jun 11 07:54:37 2016
@@ -7561,19 +7561,20 @@ multiclass avx512_shift_packed<bits<8> o
(outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,(_.VT (OpNode
- (_.LdFrag addr:$src1), (i8 imm:$src2))))]>;
+ (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i8 imm:$src2))))]>;
}
multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
Format MRMm, string OpcodeStr, Predicate prd>{
let Predicates = [prd] in
defm Z512 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
- OpcodeStr, v8i64_info>, EVEX_V512;
+ OpcodeStr, v64i8_info>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
- OpcodeStr, v4i64x_info>, EVEX_V256;
+ OpcodeStr, v32i8x_info>, EVEX_V256;
defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
- OpcodeStr, v2i64x_info>, EVEX_V128;
+ OpcodeStr, v16i8x_info>, EVEX_V128;
}
}
defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=272468&r1=272467&r2=272468&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sat Jun 11 07:54:37 2016
@@ -4081,13 +4081,13 @@ let ExeDomain = SSEPackedInt, SchedRW =
(outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
"vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (v2i64 (X86vshldq VR128:$src1, (i8 imm:$src2))))]>,
+ (v16i8 (X86vshldq VR128:$src1, (i8 imm:$src2))))]>,
VEX_4V;
def VPSRLDQri : PDIi8<0x73, MRM3r,
(outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
"vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (v2i64 (X86vshrdq VR128:$src1, (i8 imm:$src2))))]>,
+ (v16i8 (X86vshrdq VR128:$src1, (i8 imm:$src2))))]>,
VEX_4V;
// PSRADQri doesn't exist in SSE[1-3].
} // Predicates = [HasAVX, NoVLX_Or_NoBWI]
@@ -4131,13 +4131,13 @@ let ExeDomain = SSEPackedInt, SchedRW =
(outs VR256:$dst), (ins VR256:$src1, u8imm:$src2),
"vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR256:$dst,
- (v4i64 (X86vshldq VR256:$src1, (i8 imm:$src2))))]>,
+ (v32i8 (X86vshldq VR256:$src1, (i8 imm:$src2))))]>,
VEX_4V, VEX_L;
def VPSRLDQYri : PDIi8<0x73, MRM3r,
(outs VR256:$dst), (ins VR256:$src1, u8imm:$src2),
"vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR256:$dst,
- (v4i64 (X86vshrdq VR256:$src1, (i8 imm:$src2))))]>,
+ (v32i8 (X86vshrdq VR256:$src1, (i8 imm:$src2))))]>,
VEX_4V, VEX_L;
// PSRADQYri doesn't exist in SSE[1-3].
} // Predicates = [HasAVX2, NoVLX_Or_NoBWI]
@@ -4176,13 +4176,13 @@ let ExeDomain = SSEPackedInt, SchedRW =
(outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
"pslldq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2i64 (X86vshldq VR128:$src1, (i8 imm:$src2))))],
+ (v16i8 (X86vshldq VR128:$src1, (i8 imm:$src2))))],
IIC_SSE_INTSHDQ_P_RI>;
def PSRLDQri : PDIi8<0x73, MRM3r,
(outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
"psrldq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2i64 (X86vshrdq VR128:$src1, (i8 imm:$src2))))],
+ (v16i8 (X86vshrdq VR128:$src1, (i8 imm:$src2))))],
IIC_SSE_INTSHDQ_P_RI>;
// PSRADQri doesn't exist in SSE[1-3].
}
More information about the llvm-commits
mailing list