[llvm] r321576 - [X86] Custom legalize vXi1 extract_subvector with KSHIFTR.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 29 22:45:43 PST 2017
Author: ctopper
Date: Fri Dec 29 22:45:43 2017
New Revision: 321576
URL: http://llvm.org/viewvc/llvm-project?rev=321576&view=rev
Log:
[X86] Custom legalize vXi1 extract_subvector with KSHIFTR.
This allows us to remove some isel patterns.
This is mostly NFC, but we now use KSHIFTB instead of KSHIFTW with DQI.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll
llvm/trunk/test/CodeGen/X86/pr33349.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=321576&r1=321575&r2=321576&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Dec 29 22:45:43 2017
@@ -1186,9 +1186,8 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
- for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1,
- MVT::v16i1, MVT::v32i1, MVT::v64i1 })
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
+ for (auto VT : { MVT::v1i1, MVT::v8i1 })
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
@@ -1428,6 +1427,8 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
+ for (auto VT : { MVT::v16i1, MVT::v32i1 })
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
// Extends from v32i1 masks to 256-bit vectors.
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
@@ -1540,6 +1541,8 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
+ for (auto VT : { MVT::v2i1, MVT::v4i1 })
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
// Extends from v2i1/v4i1 masks to 128-bit vectors.
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom);
@@ -15070,6 +15073,42 @@ static SDValue LowerINSERT_SUBVECTOR(SDV
return insert1BitVector(Op, DAG, Subtarget);
}
+static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
+ "Only vXi1 extract_subvectors need custom lowering");
+
+ SDLoc dl(Op);
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+
+ if (!isa<ConstantSDNode>(Idx))
+ return SDValue();
+
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0) // the operation is legal
+ return Op;
+
+ MVT VecVT = Vec.getSimpleValueType();
+ unsigned NumElems = VecVT.getVectorNumElements();
+
+ // Extend to natively supported kshift.
+ MVT WideVecVT = VecVT;
+ if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) {
+ WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
+ DAG.getUNDEF(WideVecVT), Vec,
+ DAG.getIntPtrConstant(0, dl));
+ }
+
+ // Shift to the LSB.
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
+
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec,
+ DAG.getIntPtrConstant(0, dl));
+}
+
// Returns the appropriate wrapper opcode for a global reference.
unsigned X86TargetLowering::getGlobalWrapperKind(const GlobalValue *GV) const {
// References to absolute symbols are never PC-relative.
@@ -24595,6 +24634,7 @@ SDValue X86TargetLowering::LowerOperatio
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=321576&r1=321575&r2=321576&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Fri Dec 29 22:45:43 2017
@@ -3087,49 +3087,6 @@ defm : operation_subvector_mask_lowering
defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
-
-multiclass vextract_for_mask_to_mask<string InstrStr, X86KVectorVTInfo From,
- X86KVectorVTInfo To, Predicate prd> {
-let Predicates = [prd] in
- def :
- Pat<(To.KVT(extract_subvector(From.KVT From.KRC:$src), (iPTR imm:$imm8))),
- (To.KVT(COPY_TO_REGCLASS
- (!cast<Instruction>(InstrStr#"ri") From.KVT:$src,
- (i8 imm:$imm8)), To.KRC))>;
-}
-
-multiclass vextract_for_mask_to_mask_legal_w<X86KVectorVTInfo From,
- X86KVectorVTInfo To> {
-def :
- Pat<(To.KVT(extract_subvector(From.KVT From.KRC:$src), (iPTR imm:$imm8))),
- (To.KVT(COPY_TO_REGCLASS
- (KSHIFTRWri(COPY_TO_REGCLASS From.KRC:$src, VK16),
- (i8 imm:$imm8)), To.KRC))>;
-}
-
-defm : vextract_for_mask_to_mask_legal_w<v2i1_info, v1i1_info>;
-defm : vextract_for_mask_to_mask_legal_w<v4i1_info, v1i1_info>;
-defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v1i1_info>;
-defm : vextract_for_mask_to_mask_legal_w<v4i1_info, v2i1_info>;
-defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v2i1_info>;
-defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v4i1_info>;
-
-defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v1i1_info, HasAVX512>;
-defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v1i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v1i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v2i1_info, HasAVX512>;
-defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v2i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v2i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v4i1_info, HasAVX512>;
-defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v4i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v4i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v8i1_info, HasAVX512>;
-defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v8i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v8i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v16i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v16i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v32i1_info, HasBWI>;
-
// Patterns for kmask shift
multiclass mask_shift_lowering<RegisterClass RC, ValueType VT> {
def : Pat<(VT (X86kshiftl RC:$src, (i8 imm:$imm))),
Modified: llvm/trunk/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-extract-subvector-load-store.ll?rev=321576&r1=321575&r2=321576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-extract-subvector-load-store.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-extract-subvector-load-store.ll Fri Dec 29 22:45:43 2017
@@ -6,7 +6,7 @@ define void @load_v8i1_broadcast_4_v2i1(
; AVX512-LABEL: load_v8i1_broadcast_4_v2i1:
; AVX512: # %bb.0:
; AVX512-NEXT: kmovb (%rdi), %k0
-; AVX512-NEXT: kshiftrw $4, %k0, %k0
+; AVX512-NEXT: kshiftrb $4, %k0, %k0
; AVX512-NEXT: vpmovm2q %k0, %xmm2
; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512-NEXT: vpmovq2m %xmm2, %k1
@@ -37,7 +37,7 @@ define void @load_v8i1_broadcast_7_v2i1(
; AVX512-LABEL: load_v8i1_broadcast_7_v2i1:
; AVX512: # %bb.0:
; AVX512-NEXT: kmovb (%rdi), %k0
-; AVX512-NEXT: kshiftrw $6, %k0, %k0
+; AVX512-NEXT: kshiftrb $6, %k0, %k0
; AVX512-NEXT: vpmovm2q %k0, %xmm2
; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; AVX512-NEXT: vpmovq2m %xmm2, %k1
@@ -624,7 +624,7 @@ define void @load_v2i1_broadcast_1_v1i1_
; AVX512-LABEL: load_v2i1_broadcast_1_v1i1_store:
; AVX512: # %bb.0:
; AVX512-NEXT: kmovb (%rdi), %k0
-; AVX512-NEXT: kshiftrw $1, %k0, %k0
+; AVX512-NEXT: kshiftrb $1, %k0, %k0
; AVX512-NEXT: kmovb %k0, (%rsi)
; AVX512-NEXT: retq
;
@@ -645,7 +645,7 @@ define void @load_v3i1_broadcast_1_v1i1_
; AVX512-LABEL: load_v3i1_broadcast_1_v1i1_store:
; AVX512: # %bb.0:
; AVX512-NEXT: kmovb (%rdi), %k0
-; AVX512-NEXT: kshiftrw $1, %k0, %k0
+; AVX512-NEXT: kshiftrb $1, %k0, %k0
; AVX512-NEXT: kmovb %k0, (%rsi)
; AVX512-NEXT: retq
;
@@ -666,7 +666,7 @@ define void @load_v3i1_broadcast_2_v1i1_
; AVX512-LABEL: load_v3i1_broadcast_2_v1i1_store:
; AVX512: # %bb.0:
; AVX512-NEXT: kmovb (%rdi), %k0
-; AVX512-NEXT: kshiftrw $2, %k0, %k0
+; AVX512-NEXT: kshiftrb $2, %k0, %k0
; AVX512-NEXT: kmovb %k0, (%rsi)
; AVX512-NEXT: retq
;
@@ -687,7 +687,7 @@ define void @load_v4i1_broadcast_2_v1i1_
; AVX512-LABEL: load_v4i1_broadcast_2_v1i1_store:
; AVX512: # %bb.0:
; AVX512-NEXT: kmovb (%rdi), %k0
-; AVX512-NEXT: kshiftrw $2, %k0, %k0
+; AVX512-NEXT: kshiftrb $2, %k0, %k0
; AVX512-NEXT: kmovb %k0, (%rsi)
; AVX512-NEXT: retq
;
@@ -708,7 +708,7 @@ define void @load_v4i1_broadcast_3_v1i1_
; AVX512-LABEL: load_v4i1_broadcast_3_v1i1_store:
; AVX512: # %bb.0:
; AVX512-NEXT: kmovb (%rdi), %k0
-; AVX512-NEXT: kshiftrw $3, %k0, %k0
+; AVX512-NEXT: kshiftrb $3, %k0, %k0
; AVX512-NEXT: kmovb %k0, (%rsi)
; AVX512-NEXT: retq
;
@@ -729,7 +729,7 @@ define void @load_v8i1_broadcast_4_v1i1_
; AVX512-LABEL: load_v8i1_broadcast_4_v1i1_store:
; AVX512: # %bb.0:
; AVX512-NEXT: kmovb (%rdi), %k0
-; AVX512-NEXT: kshiftrw $4, %k0, %k0
+; AVX512-NEXT: kshiftrb $4, %k0, %k0
; AVX512-NEXT: kmovb %k0, (%rsi)
; AVX512-NEXT: retq
;
@@ -750,7 +750,7 @@ define void @load_v8i1_broadcast_4_v2i1_
; AVX512-LABEL: load_v8i1_broadcast_4_v2i1_store:
; AVX512: # %bb.0:
; AVX512-NEXT: kmovb (%rdi), %k0
-; AVX512-NEXT: kshiftrw $4, %k0, %k0
+; AVX512-NEXT: kshiftrb $4, %k0, %k0
; AVX512-NEXT: vpmovm2q %k0, %xmm0
; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
; AVX512-NEXT: vpmovq2m %xmm0, %k0
@@ -779,7 +779,7 @@ define void @load_v8i1_broadcast_7_v1i1_
; AVX512-LABEL: load_v8i1_broadcast_7_v1i1_store:
; AVX512: # %bb.0:
; AVX512-NEXT: kmovb (%rdi), %k0
-; AVX512-NEXT: kshiftrw $7, %k0, %k0
+; AVX512-NEXT: kshiftrb $7, %k0, %k0
; AVX512-NEXT: kmovb %k0, (%rsi)
; AVX512-NEXT: retq
;
@@ -800,7 +800,7 @@ define void @load_v8i1_broadcast_7_v2i1_
; AVX512-LABEL: load_v8i1_broadcast_7_v2i1_store:
; AVX512: # %bb.0:
; AVX512-NEXT: kmovb (%rdi), %k0
-; AVX512-NEXT: kshiftrw $6, %k0, %k0
+; AVX512-NEXT: kshiftrb $6, %k0, %k0
; AVX512-NEXT: vpmovm2q %k0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX512-NEXT: vpmovq2m %xmm0, %k0
Modified: llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll?rev=321576&r1=321575&r2=321576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll Fri Dec 29 22:45:43 2017
@@ -136,7 +136,7 @@ define <4 x i1> @test9(<8 x i1> %a, <8 x
; CHECK: # %bb.0:
; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
; CHECK-NEXT: vpmovw2m %xmm0, %k0
-; CHECK-NEXT: kshiftrw $4, %k0, %k0
+; CHECK-NEXT: kshiftrb $4, %k0, %k0
; CHECK-NEXT: vpmovm2d %k0, %xmm0
; CHECK-NEXT: retq
%res = shufflevector <8 x i1> %a, <8 x i1> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -148,7 +148,7 @@ define <2 x i1> @test10(<4 x i1> %a, <4
; CHECK: # %bb.0:
; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0
-; CHECK-NEXT: kshiftrw $2, %k0, %k0
+; CHECK-NEXT: kshiftrb $2, %k0, %k0
; CHECK-NEXT: vpmovm2q %k0, %xmm0
; CHECK-NEXT: retq
%res = shufflevector <4 x i1> %a, <4 x i1> %b, <2 x i32> <i32 2, i32 3>
Modified: llvm/trunk/test/CodeGen/X86/pr33349.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr33349.ll?rev=321576&r1=321575&r2=321576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr33349.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr33349.ll Fri Dec 29 22:45:43 2017
@@ -40,7 +40,7 @@ target triple = "x86_64-unknown-linux-gn
; SKX: # %bb.0: # %bb
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0
-; SKX-NEXT: kshiftrw $2, %k0, %k1
+; SKX-NEXT: kshiftrb $2, %k0, %k1
; SKX-NEXT: kshiftrw $1, %k1, %k2
; SKX-NEXT: kmovd %k2, %eax
; SKX-NEXT: testb $1, %al
More information about the llvm-commits
mailing list