[llvm] r293448 - [AVX-512] Don't reuse VSHLI/VSRLI for mask register shifts. VSHLI/VSHRI shift within elements while KSHIFT moves whole elements.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 29 16:06:02 PST 2017
Author: ctopper
Date: Sun Jan 29 18:06:01 2017
New Revision: 293448
URL: http://llvm.org/viewvc/llvm-project?rev=293448&view=rev
Log:
[AVX-512] Don't reuse VSHLI/VSRLI for mask register shifts. VSHLI/VSHRI shift within elements while KSHIFT moves whole elements.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=293448&r1=293447&r2=293448&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Jan 29 18:06:01 2017
@@ -5021,7 +5021,8 @@ static SDValue insert1BitVector(SDValue
if (Vec.isUndef()) {
if (IdxVal != 0) {
SDValue ShiftBits = DAG.getConstant(IdxVal, dl, MVT::i8);
- WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, WideSubVec, ShiftBits);
+ WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
+ ShiftBits);
}
return ExtractSubVec(WideSubVec);
}
@@ -5030,9 +5031,9 @@ static SDValue insert1BitVector(SDValue
NumElems = WideOpVT.getVectorNumElements();
unsigned ShiftLeft = NumElems - SubVecNumElems;
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
- Vec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, WideSubVec,
- DAG.getConstant(ShiftLeft, dl, MVT::i8));
- Vec = ShiftRight ? DAG.getNode(X86ISD::VSRLI, dl, WideOpVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
+ DAG.getConstant(ShiftLeft, dl, MVT::i8));
+ Vec = ShiftRight ? DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
DAG.getConstant(ShiftRight, dl, MVT::i8)) : Vec;
return ExtractSubVec(Vec);
}
@@ -5041,8 +5042,8 @@ static SDValue insert1BitVector(SDValue
// Zero lower bits of the Vec
SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
- Vec = DAG.getNode(X86ISD::VSRLI, dl, WideOpVT, Vec, ShiftBits);
- Vec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
// Merge them together, SubVec should be zero extended.
WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
getZeroVector(WideOpVT, Subtarget, DAG, dl),
@@ -5054,12 +5055,12 @@ static SDValue insert1BitVector(SDValue
// Simple case when we put subvector in the upper part
if (IdxVal + SubVecNumElems == NumElems) {
// Zero upper bits of the Vec
- WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, WideSubVec,
+ WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
- Vec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, Vec, ShiftBits);
- Vec = DAG.getNode(X86ISD::VSRLI, dl, WideOpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
return ExtractSubVec(Vec);
}
@@ -13658,9 +13659,9 @@ X86TargetLowering::ExtractBitFromMaskVec
}
unsigned MaxSift = VecVT.getVectorNumElements() - 1;
if (MaxSift - IdxVal)
- Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8));
- Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(MaxSift, dl, MVT::i8));
return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i1, Vec,
DAG.getIntPtrConstant(0, dl));
@@ -13802,7 +13803,7 @@ X86TargetLowering::InsertBitToMaskVector
if(Vec.isUndef()) {
if (IdxVal)
- EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
+ EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
return EltInVec;
}
@@ -13812,21 +13813,21 @@ X86TargetLowering::InsertBitToMaskVector
if (IdxVal == 0 ) {
// EltInVec already at correct index and other bits are 0.
// Clean the first bit in source vector.
- Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(1 , dl, MVT::i8));
- Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
DAG.getConstant(1, dl, MVT::i8));
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
if (IdxVal == NumElems -1) {
// Move the bit to the last position inside the vector.
- EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
+ EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
// Clean the last bit in the source vector.
- Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
DAG.getConstant(1, dl, MVT::i8));
- Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(1 , dl, MVT::i8));
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
@@ -23945,6 +23946,8 @@ const char *X86TargetLowering::getTarget
case X86ISD::TESTNM: return "X86ISD::TESTNM";
case X86ISD::KORTEST: return "X86ISD::KORTEST";
case X86ISD::KTEST: return "X86ISD::KTEST";
+ case X86ISD::KSHIFTL: return "X86ISD::KSHIFTL";
+ case X86ISD::KSHIFTR: return "X86ISD::KSHIFTR";
case X86ISD::PACKSS: return "X86ISD::PACKSS";
case X86ISD::PACKUS: return "X86ISD::PACKUS";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
@@ -30494,10 +30497,7 @@ static SDValue combineVectorShift(SDNode
bool LogicalShift = X86ISD::VSHLI == Opcode || X86ISD::VSRLI == Opcode;
EVT VT = N->getValueType(0);
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
-
- // This fails for mask register (vXi1) shifts.
- if ((NumBitsPerElt % 8) != 0)
- return SDValue();
+ assert((NumBitsPerElt % 8) == 0);
// Out of range logical bit shifts are guaranteed to be zero.
// Out of range arithmetic bit shifts splat the sign bit.
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=293448&r1=293447&r2=293448&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sun Jan 29 18:06:01 2017
@@ -320,6 +320,9 @@ namespace llvm {
// Vector shift elements by immediate
VSHLI, VSRLI, VSRAI,
+ // Shifts of mask registers.
+ KSHIFTL, KSHIFTR,
+
// Bit rotate by immediate
VROTLI, VROTRI,
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=293448&r1=293447&r2=293448&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Jan 29 18:06:01 2017
@@ -2536,8 +2536,8 @@ multiclass avx512_mask_shiftop_w<bits<8>
}
}
-defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
-defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
+defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>;
+defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>;
// Mask setting all 0s or 1s
multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
@@ -2618,12 +2618,12 @@ def : Pat<(v32i1 (extract_subvector (v64
// Patterns for kmask shift
multiclass mask_shift_lowering<RegisterClass RC, ValueType VT> {
- def : Pat<(VT (X86vshli RC:$src, (i8 imm:$imm))),
+ def : Pat<(VT (X86kshiftl RC:$src, (i8 imm:$imm))),
(VT (COPY_TO_REGCLASS
(KSHIFTLWri (COPY_TO_REGCLASS RC:$src, VK16),
(I8Imm $imm)),
RC))>;
- def : Pat<(VT (X86vsrli RC:$src, (i8 imm:$imm))),
+ def : Pat<(VT (X86kshiftr RC:$src, (i8 imm:$imm))),
(VT (COPY_TO_REGCLASS
(KSHIFTRWri (COPY_TO_REGCLASS RC:$src, VK16),
(I8Imm $imm)),
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=293448&r1=293447&r2=293448&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sun Jan 29 18:06:01 2017
@@ -200,6 +200,15 @@ def X86vshli : SDNode<"X86ISD::VSHLI",
def X86vsrli : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
+def X86kshiftl : SDNode<"X86ISD::KSHIFTL",
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>,
+ SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i8>]>>;
+def X86kshiftr : SDNode<"X86ISD::KSHIFTR",
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>,
+ SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i8>]>>;
+
def X86vrotli : SDNode<"X86ISD::VROTLI", SDTIntShiftOp>;
def X86vrotri : SDNode<"X86ISD::VROTRI", SDTIntShiftOp>;
More information about the llvm-commits
mailing list