[llvm] r324791 - [X86] Teach combineInsertSubvector how to combine some k-register insert_subvectors and extract_subvector sequences to remove extra zeroing.wq
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 9 17:00:41 PST 2018
Author: ctopper
Date: Fri Feb 9 17:00:41 2018
New Revision: 324791
URL: http://llvm.org/viewvc/llvm-project?rev=324791&view=rev
Log:
[X86] Teach combineInsertSubvector how to combine some k-register insert_subvectors and extract_subvector sequences to remove extra zeroing.wq
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=324791&r1=324790&r2=324791&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Feb 9 17:00:41 2018
@@ -37660,9 +37660,7 @@ static SDValue combineInsertSubvector(SD
MVT OpVT = N->getSimpleValueType(0);
- // Early out for mask vectors.
- if (OpVT.getVectorElementType() == MVT::i1)
- return SDValue();
+ bool IsI1Vector = OpVT.getVectorElementType() == MVT::i1;
SDLoc dl(N);
SDValue Vec = N->getOperand(0);
@@ -37674,23 +37672,40 @@ static SDValue combineInsertSubvector(SD
if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
// Inserting zeros into zeros is a nop.
if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
- return Vec;
+ return getZeroVector(OpVT, Subtarget, DAG, dl);
// If we're inserting into a zero vector and then into a larger zero vector,
// just insert into the larger zero vector directly.
if (SubVec.getOpcode() == ISD::INSERT_SUBVECTOR &&
ISD::isBuildVectorAllZeros(SubVec.getOperand(0).getNode())) {
unsigned Idx2Val = SubVec.getConstantOperandVal(2);
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec,
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
+ getZeroVector(OpVT, Subtarget, DAG, dl),
SubVec.getOperand(1),
DAG.getIntPtrConstant(IdxVal + Idx2Val, dl));
}
+ // If we're inserting into a zero vector and our input was extracted from an
+ // insert into a zero vector of the same type and the extraction was at
+ // least as large as the original insertion. Just insert the original
+ // subvector into a zero vector.
+ if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && IdxVal == 0 &&
+ SubVec.getConstantOperandVal(1) == 0 &&
+ SubVec.getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR) {
+ SDValue Ins = SubVec.getOperand(0);
+ if (Ins.getConstantOperandVal(2) == 0 &&
+ ISD::isBuildVectorAllZeros(Ins.getOperand(0).getNode()) &&
+ Ins.getOperand(1).getValueSizeInBits() <= SubVecVT.getSizeInBits())
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
+ getZeroVector(OpVT, Subtarget, DAG, dl),
+ Ins.getOperand(1), N->getOperand(2));
+ }
+
// If we're inserting a bitcast into zeros, rewrite the insert and move the
// bitcast to the other side. This helps with detecting zero extending
// during isel.
// TODO: Is this useful for other indices than 0?
- if (SubVec.getOpcode() == ISD::BITCAST && IdxVal == 0) {
+ if (!IsI1Vector && SubVec.getOpcode() == ISD::BITCAST && IdxVal == 0) {
MVT CastVT = SubVec.getOperand(0).getSimpleValueType();
unsigned NumElems = OpVT.getSizeInBits() / CastVT.getScalarSizeInBits();
MVT NewVT = MVT::getVectorVT(CastVT.getVectorElementType(), NumElems);
@@ -37701,6 +37716,10 @@ static SDValue combineInsertSubvector(SD
}
}
+ // Stop here if this is an i1 vector.
+ if (IsI1Vector)
+ return SDValue();
+
// If this is an insert of an extract, combine to a shuffle. Don't do this
// if the insert or extract can be represented with a subregister operation.
if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=324791&r1=324790&r2=324791&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Fri Feb 9 17:00:41 2018
@@ -2757,16 +2757,13 @@ define <8 x i64> @mask_widening(<2 x i64
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; KNL-NEXT: kshiftlw $12, %k0, %k0
-; KNL-NEXT: kshiftrw $12, %k0, %k0
-; KNL-NEXT: kshiftlw $8, %k0, %k0
-; KNL-NEXT: kshiftrw $8, %k0, %k1
+; KNL-NEXT: kshiftrw $12, %k0, %k1
; KNL-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: mask_widening:
; SKX: ## %bb.0: ## %entry
-; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
-; SKX-NEXT: kmovb %k0, %k1
+; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
; SKX-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; SKX-NEXT: retq
;
@@ -2776,9 +2773,7 @@ define <8 x i64> @mask_widening(<2 x i64
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kshiftlw $12, %k0, %k0
-; AVX512BW-NEXT: kshiftrw $12, %k0, %k0
-; AVX512BW-NEXT: kshiftlw $8, %k0, %k0
-; AVX512BW-NEXT: kshiftrw $8, %k0, %k1
+; AVX512BW-NEXT: kshiftrw $12, %k0, %k1
; AVX512BW-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; AVX512BW-NEXT: retq
;
@@ -2787,9 +2782,8 @@ define <8 x i64> @mask_widening(<2 x i64
; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; AVX512DQ-NEXT: kshiftlb $4, %k0, %k0
-; AVX512DQ-NEXT: kshiftrb $4, %k0, %k0
-; AVX512DQ-NEXT: kmovb %k0, %k1
+; AVX512DQ-NEXT: kshiftlw $12, %k0, %k0
+; AVX512DQ-NEXT: kshiftrw $12, %k0, %k1
; AVX512DQ-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; AVX512DQ-NEXT: retq
entry:
More information about the llvm-commits
mailing list