[llvm] r327054 - [X86][AVX] Pull out variable permute creation from LowerBUILD_VECTORAsVariablePermute. NFCI.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 8 12:07:06 PST 2018
Author: rksimon
Date: Thu Mar 8 12:07:06 2018
New Revision: 327054
URL: http://llvm.org/viewvc/llvm-project?rev=327054&view=rev
Log:
[X86][AVX] Pull out variable permute creation from LowerBUILD_VECTORAsVariablePermute. NFCI.
This will make it easier to handle more complex cases than basic scaling or index masks.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=327054&r1=327053&r2=327054&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Mar 8 12:07:06 2018
@@ -7908,6 +7908,144 @@ static SDValue materializeVectorConstant
return SDValue();
}
+/// Look for opportunities to create a VPERMV/VPERMILPV/PSHUFB variable permute
+/// from a vector of source values and a vector of extraction indices.
+/// The vectors might be manipulated to match the type of the permute op.
+SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
+ SDLoc &DL, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ unsigned Opcode = 0;
+ MVT ShuffleVT = VT;
+
+ switch (VT.SimpleTy) {
+ default:
+ break;
+ case MVT::v16i8:
+ if (Subtarget.hasSSE3())
+ Opcode = X86ISD::PSHUFB;
+ break;
+ case MVT::v8i16:
+ if (Subtarget.hasVLX() && Subtarget.hasBWI())
+ Opcode = X86ISD::VPERMV;
+ else if (Subtarget.hasSSE3()) {
+ Opcode = X86ISD::PSHUFB;
+ ShuffleVT = MVT::v16i8;
+ }
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ if (Subtarget.hasAVX()) {
+ Opcode = X86ISD::VPERMILPV;
+ ShuffleVT = MVT::v4f32;
+ } else if (Subtarget.hasSSE3()) {
+ Opcode = X86ISD::PSHUFB;
+ ShuffleVT = MVT::v16i8;
+ }
+ break;
+ case MVT::v2f64:
+ case MVT::v2i64:
+ if (Subtarget.hasAVX()) {
+ Opcode = X86ISD::VPERMILPV;
+ ShuffleVT = MVT::v2f64;
+ }
+ break;
+ case MVT::v32i8:
+ if (Subtarget.hasVLX() && Subtarget.hasVBMI())
+ Opcode = X86ISD::VPERMV;
+ break;
+ case MVT::v16i16:
+ if (Subtarget.hasVLX() && Subtarget.hasBWI())
+ Opcode = X86ISD::VPERMV;
+ break;
+ case MVT::v8f32:
+ case MVT::v8i32:
+ if (Subtarget.hasAVX2())
+ Opcode = X86ISD::VPERMV;
+ break;
+ case MVT::v4i64:
+ case MVT::v4f64:
+ if (Subtarget.hasVLX())
+ Opcode = X86ISD::VPERMV;
+ else if (Subtarget.hasAVX2()) {
+ Opcode = X86ISD::VPERMV;
+ ShuffleVT = MVT::v8f32;
+ }
+ break;
+ case MVT::v64i8:
+ if (Subtarget.hasVBMI())
+ Opcode = X86ISD::VPERMV;
+ break;
+ case MVT::v32i16:
+ if (Subtarget.hasBWI())
+ Opcode = X86ISD::VPERMV;
+ break;
+ case MVT::v16f32:
+ case MVT::v16i32:
+ case MVT::v8f64:
+ case MVT::v8i64:
+ if (Subtarget.hasAVX512())
+ Opcode = X86ISD::VPERMV;
+ break;
+ }
+ if (!Opcode)
+ return SDValue();
+
+ assert((VT.getSizeInBits() == ShuffleVT.getSizeInBits()) &&
+ (VT.getScalarSizeInBits() % ShuffleVT.getScalarSizeInBits()) == 0 &&
+ "Illegal variable permute shuffle type");
+
+ unsigned NumElts = VT.getVectorNumElements();
+ if (IndicesVec.getValueType().getVectorNumElements() < NumElts)
+ return SDValue();
+ else if (IndicesVec.getValueType().getVectorNumElements() > NumElts)
+ IndicesVec = extractSubVector(IndicesVec, 0, DAG, SDLoc(IndicesVec),
+ NumElts * VT.getScalarSizeInBits());
+
+ MVT IndicesVT = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
+ IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT);
+
+ if (SrcVec.getValueSizeInBits() > VT.getSizeInBits())
+ return SDValue();
+ else if (SrcVec.getValueSizeInBits() < VT.getSizeInBits()) {
+ SrcVec =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(SrcVec), VT, DAG.getUNDEF(VT),
+ SrcVec, DAG.getIntPtrConstant(0, SDLoc(SrcVec)));
+ }
+
+ uint64_t Scale = VT.getScalarSizeInBits() / ShuffleVT.getScalarSizeInBits();
+ if (Scale > 1) {
+ assert(isPowerOf2_64(Scale) && "Illegal variable permute shuffle scale");
+ unsigned ShuffleBits = ShuffleVT.getScalarSizeInBits();
+ uint64_t IndexScale = 0;
+ uint64_t IndexOffset = 0;
+
+ // If we're scaling a smaller permute op, then we need to repeat the
+ // indices, scaling and offsetting them as well. e.g. v4i32 -> v16i8 (Scale
+ // = 4) IndexScale = v4i32 Splat(4 << 24 | 4 << 16 | 4 << 8 | 4) indexOffset
+ // = v4i32 Splat(3 << 24 | 2 << 16 | 1 << 8 | 0)
+ for (uint64_t i = 0; i != Scale; ++i) {
+ IndexScale |= Scale << (i * ShuffleBits);
+ IndexOffset |= i << (i * ShuffleBits);
+ }
+
+ IndicesVec =
+ DAG.getNode(ISD::MUL, SDLoc(IndicesVec), IndicesVT, IndicesVec,
+ DAG.getConstant(IndexScale, SDLoc(IndicesVec), IndicesVT));
+ IndicesVec =
+ DAG.getNode(ISD::ADD, SDLoc(IndicesVec), IndicesVT, IndicesVec,
+ DAG.getConstant(IndexOffset, SDLoc(IndicesVec), IndicesVT));
+ }
+
+ EVT ShuffleIdxVT = EVT(ShuffleVT).changeVectorElementTypeToInteger();
+ IndicesVec = DAG.getBitcast(ShuffleIdxVT, IndicesVec);
+
+ SrcVec = DAG.getBitcast(ShuffleVT, SrcVec);
+ SDValue Res = Opcode == X86ISD::VPERMV
+ ? DAG.getNode(Opcode, DL, ShuffleVT, IndicesVec, SrcVec)
+ : DAG.getNode(Opcode, DL, ShuffleVT, SrcVec, IndicesVec);
+ return DAG.getBitcast(VT, Res);
+}
+
// Tries to lower a BUILD_VECTOR composed of extract-extract chains that can be
// reasoned to be a permutation of a vector by indices in a non-constant vector.
// (build_vector (extract_elt V, (extract_elt I, 0)),
@@ -7922,82 +8060,6 @@ static SDValue materializeVectorConstant
static SDValue
LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- // Look for VPERMV/VPERMILPV/PSHUFB opportunities.
- auto LegalPermuteOpcode = [&Subtarget](MVT DstVT, MVT &ShuffleVT) {
- unsigned Opcode = 0;
- switch (DstVT.SimpleTy) {
- default:
- break;
- case MVT::v16i8:
- if (Subtarget.hasSSE3())
- Opcode = X86ISD::PSHUFB;
- break;
- case MVT::v8i16:
- if (Subtarget.hasVLX() && Subtarget.hasBWI())
- Opcode = X86ISD::VPERMV;
- else if (Subtarget.hasSSE3()) {
- Opcode = X86ISD::PSHUFB;
- ShuffleVT = MVT::v16i8;
- }
- break;
- case MVT::v4f32:
- case MVT::v4i32:
- if (Subtarget.hasAVX()) {
- Opcode = X86ISD::VPERMILPV;
- ShuffleVT = MVT::v4f32;
- } else if (Subtarget.hasSSE3()) {
- Opcode = X86ISD::PSHUFB;
- ShuffleVT = MVT::v16i8;
- }
- break;
- case MVT::v2f64:
- case MVT::v2i64:
- if (Subtarget.hasAVX()) {
- Opcode = X86ISD::VPERMILPV;
- ShuffleVT = MVT::v2f64;
- }
- break;
- case MVT::v32i8:
- if (Subtarget.hasVLX() && Subtarget.hasVBMI())
- Opcode = X86ISD::VPERMV;
- break;
- case MVT::v16i16:
- if (Subtarget.hasVLX() && Subtarget.hasBWI())
- Opcode = X86ISD::VPERMV;
- break;
- case MVT::v8f32:
- case MVT::v8i32:
- if (Subtarget.hasAVX2())
- Opcode = X86ISD::VPERMV;
- break;
- case MVT::v4i64:
- case MVT::v4f64:
- if (Subtarget.hasVLX())
- Opcode = X86ISD::VPERMV;
- else if (Subtarget.hasAVX2()) {
- Opcode = X86ISD::VPERMV;
- ShuffleVT = MVT::v8f32;
- }
- break;
- case MVT::v64i8:
- if (Subtarget.hasVBMI())
- Opcode = X86ISD::VPERMV;
- break;
- case MVT::v32i16:
- if (Subtarget.hasBWI())
- Opcode = X86ISD::VPERMV;
- break;
- case MVT::v16f32:
- case MVT::v16i32:
- case MVT::v8f64:
- case MVT::v8i64:
- if (Subtarget.hasAVX512())
- Opcode = X86ISD::VPERMV;
- break;
- }
- return Opcode;
- };
-
SDValue SrcVec, IndicesVec;
// Check for a match of the permute source vector and permute index elements.
// This is done by checking that the i-th build_vector operand is of the form:
@@ -8035,67 +8097,9 @@ LowerBUILD_VECTORAsVariablePermute(SDVal
return SDValue();
}
+ SDLoc DL(V);
MVT VT = V.getSimpleValueType();
- MVT ShuffleVT = VT;
- unsigned Opcode = LegalPermuteOpcode(VT, ShuffleVT);
- if (!Opcode)
- return SDValue();
- assert((VT.getSizeInBits() == ShuffleVT.getSizeInBits()) &&
- (VT.getScalarSizeInBits() % ShuffleVT.getScalarSizeInBits()) == 0 &&
- "Illegal variable permute shuffle type");
-
- unsigned NumElts = VT.getVectorNumElements();
- if (IndicesVec.getValueType().getVectorNumElements() < NumElts)
- return SDValue();
- else if (IndicesVec.getValueType().getVectorNumElements() > NumElts) {
- IndicesVec = extractSubVector(IndicesVec, 0, DAG, SDLoc(IndicesVec),
- NumElts * VT.getScalarSizeInBits());
- }
-
- MVT IndicesVT = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
- IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT);
-
- if (SrcVec.getValueSizeInBits() > VT.getSizeInBits())
- return SDValue();
- else if (SrcVec.getValueSizeInBits() < VT.getSizeInBits()) {
- SrcVec =
- DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(SrcVec), VT, DAG.getUNDEF(VT),
- SrcVec, DAG.getIntPtrConstant(0, SDLoc(SrcVec)));
- }
-
- uint64_t Scale = VT.getScalarSizeInBits() / ShuffleVT.getScalarSizeInBits();
- if (Scale > 1) {
- assert(isPowerOf2_64(Scale) && "Illegal variable permute shuffle scale");
- unsigned ShuffleBits = ShuffleVT.getScalarSizeInBits();
- uint64_t IndexScale = 0;
- uint64_t IndexOffset = 0;
-
- // If we're scaling a smaller permute op, then we need to repeat the indices,
- // scaling and offsetting them as well.
- // e.g. v4i32 -> v16i8 (Scale = 4)
- // IndexScale = v4i32 Splat(4 << 24 | 4 << 16 | 4 << 8 | 4)
- // indexOffset = v4i32 Splat(3 << 24 | 2 << 16 | 1 << 8 | 0)
- for (uint64_t i = 0; i != Scale; ++i) {
- IndexScale |= Scale << (i * ShuffleBits);
- IndexOffset |= i << (i * ShuffleBits);
- }
-
- SDLoc DL(IndicesVec);
- IndicesVec = DAG.getNode(ISD::MUL, DL, IndicesVT, IndicesVec,
- DAG.getConstant(IndexScale, DL, IndicesVT));
- IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec,
- DAG.getConstant(IndexOffset, DL, IndicesVT));
- }
-
- EVT ShuffleIdxVT = EVT(ShuffleVT).changeVectorElementTypeToInteger();
- IndicesVec = DAG.getBitcast(ShuffleIdxVT, IndicesVec);
-
- SrcVec = DAG.getBitcast(ShuffleVT, SrcVec);
- SDValue Res =
- Opcode == X86ISD::VPERMV
- ? DAG.getNode(Opcode, SDLoc(V), ShuffleVT, IndicesVec, SrcVec)
- : DAG.getNode(Opcode, SDLoc(V), ShuffleVT, SrcVec, IndicesVec);
- return DAG.getBitcast(VT, Res);
+ return createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
}
SDValue
More information about the llvm-commits
mailing list