[llvm] r340307 - [X86][SSE] Lower vXi8 general shifts to SSE shifts directly. NFCI.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 21 10:27:03 PDT 2018
Author: rksimon
Date: Tue Aug 21 10:27:03 2018
New Revision: 340307
URL: http://llvm.org/viewvc/llvm-project?rev=340307&view=rev
Log:
[X86][SSE] Lower vXi8 general shifts to SSE shifts directly. NFCI.
Most of these shifts are extended to vXi16 so we don't gain anything from forcing another round of generic shift lowering - we know these extended cases are legal constant splat shifts.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=340307&r1=340306&r2=340307&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Aug 21 10:27:03 2018
@@ -23844,7 +23844,6 @@ static SDValue LowerShift(SDValue Op, co
(VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) ||
(VT == MVT::v64i8 && Subtarget.hasBWI())) {
MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
- unsigned ShiftOpcode = Opc;
auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
if (VT.is512BitVector()) {
@@ -23878,32 +23877,33 @@ static SDValue LowerShift(SDValue Op, co
// We can safely do this using i16 shifts as we're only interested in
// the 3 lower bits of each byte.
Amt = DAG.getBitcast(ExtVT, Amt);
- Amt = DAG.getNode(ISD::SHL, dl, ExtVT, Amt, DAG.getConstant(5, dl, ExtVT));
+ Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExtVT, Amt, 5, DAG);
Amt = DAG.getBitcast(VT, Amt);
if (Opc == ISD::SHL || Opc == ISD::SRL) {
// r = VSELECT(r, shift(r, 4), a);
- SDValue M =
- DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT));
+ SDValue M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(4, dl, VT));
R = SignBitSelect(VT, Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// r = VSELECT(r, shift(r, 2), a);
- M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT));
+ M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(2, dl, VT));
R = SignBitSelect(VT, Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// return VSELECT(r, shift(r, 1), a);
- M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT));
+ M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(1, dl, VT));
R = SignBitSelect(VT, Amt, M, R);
return R;
}
if (Opc == ISD::SRA) {
+ unsigned X86Opc = getTargetVShiftUniformOpcode(Opc, false);
+
// For SRA we need to unpack each byte to the higher byte of a i16 vector
// so we can correctly sign extend. We don't care what happens to the
// lower byte.
@@ -23917,10 +23917,8 @@ static SDValue LowerShift(SDValue Op, co
RHi = DAG.getBitcast(ExtVT, RHi);
// r = VSELECT(r, shift(r, 4), a);
- SDValue MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
- DAG.getConstant(4, dl, ExtVT));
- SDValue MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
- DAG.getConstant(4, dl, ExtVT));
+ SDValue MLo = getTargetVShiftByConstNode(X86Opc, dl, ExtVT, RLo, 4, DAG);
+ SDValue MHi = getTargetVShiftByConstNode(X86Opc, dl, ExtVT, RHi, 4, DAG);
RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
@@ -23929,10 +23927,8 @@ static SDValue LowerShift(SDValue Op, co
AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);
// r = VSELECT(r, shift(r, 2), a);
- MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
- DAG.getConstant(2, dl, ExtVT));
- MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
- DAG.getConstant(2, dl, ExtVT));
+ MLo = getTargetVShiftByConstNode(X86Opc, dl, ExtVT, RLo, 2, DAG);
+ MHi = getTargetVShiftByConstNode(X86Opc, dl, ExtVT, RHi, 2, DAG);
RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
@@ -23941,20 +23937,15 @@ static SDValue LowerShift(SDValue Op, co
AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);
// r = VSELECT(r, shift(r, 1), a);
- MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
- DAG.getConstant(1, dl, ExtVT));
- MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
- DAG.getConstant(1, dl, ExtVT));
+ MLo = getTargetVShiftByConstNode(X86Opc, dl, ExtVT, RLo, 1, DAG);
+ MHi = getTargetVShiftByConstNode(X86Opc, dl, ExtVT, RHi, 1, DAG);
RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
// Logical shift the result back to the lower byte, leaving a zero upper
- // byte
- // meaning that we can safely pack with PACKUSWB.
- RLo =
- DAG.getNode(ISD::SRL, dl, ExtVT, RLo, DAG.getConstant(8, dl, ExtVT));
- RHi =
- DAG.getNode(ISD::SRL, dl, ExtVT, RHi, DAG.getConstant(8, dl, ExtVT));
+ // byte meaning that we can safely pack with PACKUSWB.
+ RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RLo, 8, DAG);
+ RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RHi, 8, DAG);
return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
}
}
@@ -23972,8 +23963,8 @@ static SDValue LowerShift(SDValue Op, co
RHi = DAG.getBitcast(ExtVT, RHi);
SDValue Lo = DAG.getNode(Opc, dl, ExtVT, RLo, ALo);
SDValue Hi = DAG.getNode(Opc, dl, ExtVT, RHi, AHi);
- Lo = DAG.getNode(ISD::SRL, dl, ExtVT, Lo, DAG.getConstant(16, dl, ExtVT));
- Hi = DAG.getNode(ISD::SRL, dl, ExtVT, Hi, DAG.getConstant(16, dl, ExtVT));
+ Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Lo, 16, DAG);
+ Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Hi, 16, DAG);
return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
}
More information about the llvm-commits
mailing list