[llvm] r324782 - [X86] Teach lower1BitVectorShuffle to recognize shuffles that are just filling upper elements with zero. Replace with insert_subvector.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 9 15:32:27 PST 2018
Author: ctopper
Date: Fri Feb 9 15:32:27 2018
New Revision: 324782
URL: http://llvm.org/viewvc/llvm-project?rev=324782&view=rev
Log:
[X86] Teach lower1BitVectorShuffle to recognize shuffles that are just filling upper elements with zero. Replace with insert_subvector.
There's still some extra kshifts in one of the modified test cases here, but hopefully that's only a DAG combine away.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=324782&r1=324781&r2=324782&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Feb 9 15:32:27 2018
@@ -14414,8 +14414,36 @@ static SDValue lower512BitVectorShuffle(
// vector, shuffle and then truncate it back.
static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
+ unsigned NumElts = Mask.size();
+
+ // Try to recognize shuffles that are just padding a subvector with zeros.
+ unsigned SubvecElts = 0;
+ for (int i = 0; i != (int)NumElts; ++i) {
+ if (Mask[i] >= 0 && Mask[i] != i)
+ break;
+
+ ++SubvecElts;
+ }
+ assert(SubvecElts != NumElts && "Identity shuffle?");
+
+ // Clip to a power 2.
+ SubvecElts = PowerOf2Floor(SubvecElts);
+
+ // Make sure the number of zeroable bits in the top at least covers the bits
+ // not covered by the subvector.
+ if (Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) {
+ MVT ExtractVT = MVT::getVectorVT(MVT::i1, SubvecElts);
+ SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT,
+ V1, DAG.getIntPtrConstant(0, DL));
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ getZeroVector(VT, Subtarget, DAG, DL),
+ Extract, DAG.getIntPtrConstant(0, DL));
+ }
+
+
assert(Subtarget.hasAVX512() &&
"Cannot lower 512-bit vectors w/o basic ISA!");
MVT ExtVT;
@@ -14624,7 +14652,8 @@ static SDValue lowerVectorShuffle(SDValu
DAG);
if (Is1BitVector)
- return lower1BitVectorShuffle(DL, Mask, VT, V1, V2, Subtarget, DAG);
+ return lower1BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
+ DAG);
llvm_unreachable("Unimplemented!");
}
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=324782&r1=324781&r2=324782&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Fri Feb 9 15:32:27 2018
@@ -2757,19 +2757,16 @@ define <8 x i64> @mask_widening(<2 x i64
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; KNL-NEXT: kshiftlw $12, %k0, %k0
-; KNL-NEXT: kshiftrw $12, %k0, %k1
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT: vmovdqa %ymm0, %ymm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT: kshiftrw $12, %k0, %k0
+; KNL-NEXT: kshiftlw $8, %k0, %k0
+; KNL-NEXT: kshiftrw $8, %k0, %k1
; KNL-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: mask_widening:
; SKX: ## %bb.0: ## %entry
; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
-; SKX-NEXT: vpmovm2d %k0, %zmm0
-; SKX-NEXT: vmovdqa %ymm0, %ymm0
-; SKX-NEXT: vpmovd2m %zmm0, %k1
+; SKX-NEXT: kmovb %k0, %k1
; SKX-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; SKX-NEXT: retq
;
@@ -2779,10 +2776,9 @@ define <8 x i64> @mask_widening(<2 x i64
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kshiftlw $12, %k0, %k0
-; AVX512BW-NEXT: kshiftrw $12, %k0, %k1
-; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512BW-NEXT: vmovdqa %ymm0, %ymm0
-; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512BW-NEXT: kshiftrw $12, %k0, %k0
+; AVX512BW-NEXT: kshiftlw $8, %k0, %k0
+; AVX512BW-NEXT: kshiftrw $8, %k0, %k1
; AVX512BW-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; AVX512BW-NEXT: retq
;
@@ -2793,9 +2789,7 @@ define <8 x i64> @mask_widening(<2 x i64
; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; AVX512DQ-NEXT: kshiftlb $4, %k0, %k0
; AVX512DQ-NEXT: kshiftrb $4, %k0, %k0
-; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
-; AVX512DQ-NEXT: vmovdqa %ymm0, %ymm0
-; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
+; AVX512DQ-NEXT: kmovb %k0, %k1
; AVX512DQ-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; AVX512DQ-NEXT: retq
entry:
More information about the llvm-commits
mailing list