[llvm] r323260 - [X86][AVX] LowerBUILD_VECTORAsVariablePermute - add support for VPERMILPV to v2i64/v2f64
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 23 13:33:24 PST 2018
Author: rksimon
Date: Tue Jan 23 13:33:24 2018
New Revision: 323260
URL: http://llvm.org/viewvc/llvm-project?rev=323260&view=rev
Log:
[X86][AVX] LowerBUILD_VECTORAsVariablePermute - add support for VPERMILPV to v2i64/v2f64
Minor refactor to make it possible for LowerBUILD_VECTORAsVariablePermute to be used with a wider variety of shuffles op and types.
I'd have liked to add v4i32/v4f32 support as well but we don't see v4i32 index extractions at the moment (which is why I created D42308)
After this I intend to begin adding scaling support for PSHUFB (v8i16, v4i32, v2i64)) and VPERMPS (v4f64, v4i64).
Differential Revision: https://reviews.llvm.org/D42431
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/var-permute-128.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=323260&r1=323259&r2=323260&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jan 23 13:33:24 2018
@@ -7832,55 +7832,66 @@ static SDValue materializeVectorConstant
// TODO: Handle undefs
// TODO: Utilize pshufb and zero mask blending to support more efficient
// construction of vectors with constant-0 elements.
-// TODO: Use smaller-element vectors of same width, and "interpolate" the indices,
-// when no native operation available.
+// TODO: Use smaller-element vectors of same width, and "interpolate" the
+// indices, when no native operation available.
static SDValue
LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- // Look for VPERMV and PSHUFB opportunities.
- MVT VT = V.getSimpleValueType();
- switch (VT.SimpleTy) {
- default:
- return SDValue();
- case MVT::v16i8:
- if (!Subtarget.hasSSE3())
- return SDValue();
- break;
- case MVT::v8f32:
- case MVT::v8i32:
- if (!Subtarget.hasAVX2())
- return SDValue();
- break;
- case MVT::v4i64:
- case MVT::v4f64:
- if (!Subtarget.hasVLX())
- return SDValue();
- break;
- case MVT::v16f32:
- case MVT::v8f64:
- case MVT::v16i32:
- case MVT::v8i64:
- if (!Subtarget.hasAVX512())
- return SDValue();
- break;
- case MVT::v32i16:
- if (!Subtarget.hasBWI())
- return SDValue();
- break;
- case MVT::v8i16:
- case MVT::v16i16:
- if (!Subtarget.hasVLX() || !Subtarget.hasBWI())
- return SDValue();
- break;
- case MVT::v64i8:
- if (!Subtarget.hasVBMI())
- return SDValue();
- break;
- case MVT::v32i8:
- if (!Subtarget.hasVLX() || !Subtarget.hasVBMI())
- return SDValue();
- break;
- }
+ // Look for VPERMV/VPERMILPV/PSHUFB opportunities.
+ auto LegalPermuteOpcode = [&Subtarget](MVT DstVT, MVT &ShuffleVT) {
+ unsigned Opcode = 0;
+ switch (DstVT.SimpleTy) {
+ default:
+ break;
+ case MVT::v16i8:
+ if (Subtarget.hasSSE3())
+ Opcode = X86ISD::PSHUFB;
+ break;
+ case MVT::v2f64:
+ case MVT::v2i64:
+ if (Subtarget.hasAVX()) {
+ Opcode = X86ISD::VPERMILPV;
+ ShuffleVT = MVT::v2f64;
+ }
+ break;
+ case MVT::v8f32:
+ case MVT::v8i32:
+ if (Subtarget.hasAVX2())
+ Opcode = X86ISD::VPERMV;
+ break;
+ case MVT::v4i64:
+ case MVT::v4f64:
+ if (Subtarget.hasVLX())
+ Opcode = X86ISD::VPERMV;
+ break;
+ case MVT::v16f32:
+ case MVT::v8f64:
+ case MVT::v16i32:
+ case MVT::v8i64:
+ if (Subtarget.hasAVX512())
+ Opcode = X86ISD::VPERMV;
+ break;
+ case MVT::v32i16:
+ if (Subtarget.hasBWI())
+ Opcode = X86ISD::VPERMV;
+ break;
+ case MVT::v8i16:
+ case MVT::v16i16:
+ if (Subtarget.hasVLX() && Subtarget.hasBWI())
+ Opcode = X86ISD::VPERMV;
+ break;
+ case MVT::v64i8:
+ if (Subtarget.hasVBMI())
+ Opcode = X86ISD::VPERMV;
+ break;
+ case MVT::v32i8:
+ if (Subtarget.hasVLX() && Subtarget.hasVBMI())
+ Opcode = X86ISD::VPERMV;
+ break;
+ }
+ return Opcode;
+ };
+
SDValue SrcVec, IndicesVec;
// Check for a match of the permute source vector and permute index elements.
// This is done by checking that the i-th build_vector operand is of the form:
@@ -7918,6 +7929,15 @@ LowerBUILD_VECTORAsVariablePermute(SDVal
return SDValue();
}
+ MVT VT = V.getSimpleValueType();
+ MVT ShuffleVT = VT;
+ unsigned Opcode = LegalPermuteOpcode(VT, ShuffleVT);
+ if (!Opcode)
+ return SDValue();
+ assert(VT.getScalarSizeInBits() == ShuffleVT.getScalarSizeInBits() &&
+ VT.getVectorNumElements() == ShuffleVT.getVectorNumElements() &&
+ "Illegal variable permute shuffle type");
+
unsigned NumElts = VT.getVectorNumElements();
if (IndicesVec.getValueType().getVectorNumElements() < NumElts)
return SDValue();
@@ -7937,9 +7957,12 @@ LowerBUILD_VECTORAsVariablePermute(SDVal
SrcVec, DAG.getIntPtrConstant(0, SDLoc(SrcVec)));
}
- if (VT == MVT::v16i8)
- return DAG.getNode(X86ISD::PSHUFB, SDLoc(V), VT, SrcVec, IndicesVec);
- return DAG.getNode(X86ISD::VPERMV, SDLoc(V), VT, IndicesVec, SrcVec);
+ SrcVec = DAG.getBitcast(ShuffleVT, SrcVec);
+ SDValue Res =
+ Opcode == X86ISD::VPERMV
+ ? DAG.getNode(Opcode, SDLoc(V), ShuffleVT, IndicesVec, SrcVec)
+ : DAG.getNode(Opcode, SDLoc(V), ShuffleVT, SrcVec, IndicesVec);
+ return DAG.getBitcast(VT, Res);
}
SDValue
Modified: llvm/trunk/test/CodeGen/X86/var-permute-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/var-permute-128.ll?rev=323260&r1=323259&r2=323260&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/var-permute-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/var-permute-128.ll Tue Jan 23 13:33:24 2018
@@ -23,14 +23,7 @@ define <2 x i64> @var_shuffle_v2i64(<2 x
;
; AVX-LABEL: var_shuffle_v2i64:
; AVX: # %bb.0:
-; AVX-NEXT: vmovq %xmm1, %rax
-; AVX-NEXT: andl $1, %eax
-; AVX-NEXT: vpextrq $1, %xmm1, %rcx
-; AVX-NEXT: andl $1, %ecx
-; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%index0 = extractelement <2 x i64> %indices, i32 0
%index1 = extractelement <2 x i64> %indices, i32 1
@@ -280,13 +273,7 @@ define <2 x double> @var_shuffle_v2f64(<
;
; AVX-LABEL: var_shuffle_v2f64:
; AVX: # %bb.0:
-; AVX-NEXT: vmovq %xmm1, %rax
-; AVX-NEXT: andl $1, %eax
-; AVX-NEXT: vpextrq $1, %xmm1, %rcx
-; AVX-NEXT: andl $1, %ecx
-; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; AVX-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%index0 = extractelement <2 x i64> %indices, i32 0
%index1 = extractelement <2 x i64> %indices, i32 1
More information about the llvm-commits
mailing list