[llvm] r315161 - [X86] If we see an insert of a bitcast into zero vector, canonicalize it to move the bitcast to the other side of the insert.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 7 18:33:41 PDT 2017
Author: ctopper
Date: Sat Oct 7 18:33:41 2017
New Revision: 315161
URL: http://llvm.org/viewvc/llvm-project?rev=315161&view=rev
Log:
[X86] If we see an insert of a bitcast into zero vector, canonicalize it to move the bitcast to the other side of the insert.
This improves detection of zeroing of upper bits during isel.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td
llvm/trunk/test/CodeGen/X86/sad.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=315161&r1=315160&r2=315161&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Oct 7 18:33:41 2017
@@ -36107,6 +36107,20 @@ static SDValue combineInsertSubvector(SD
SubVec.getOperand(1),
DAG.getIntPtrConstant(IdxVal + Idx2Val, dl));
}
+
+ // If we're inserting a bitcast into zeros, rewrite the insert and move the
+ // bitcast to the other side. This helps with detecting zero extending
+ // during isel.
+ // TODO: Is this useful for other indices than 0?
+ if (SubVec.getOpcode() == ISD::BITCAST && IdxVal == 0) {
+ MVT CastVT = SubVec.getOperand(0).getSimpleValueType();
+ unsigned NumElems = OpVT.getSizeInBits() / CastVT.getScalarSizeInBits();
+ MVT NewVT = MVT::getVectorVT(CastVT.getVectorElementType(), NumElems);
+ SDValue Insert = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT,
+ DAG.getBitcast(NewVT, Vec),
+ SubVec.getOperand(0), N->getOperand(2));
+ return DAG.getBitcast(OpVT, Insert);
+ }
}
// If this is an insert of an extract, combine to a shuffle. Don't do this
Modified: llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td?rev=315161&r1=315160&r2=315161&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td Sat Oct 7 18:33:41 2017
@@ -368,7 +368,8 @@ let Predicates = [HasAVX512, NoVLX] in {
// where we explicitly insert zeros.
class veczeroupper<ValueType vt, RegisterClass RC> :
PatLeaf<(vt RC:$src), [{
- return N->getOpcode() == X86ISD::VPMADDWD;
+ return N->getOpcode() == X86ISD::VPMADDWD ||
+ N->getOpcode() == X86ISD::PSADBW;
}]>;
def zeroupperv2f64 : veczeroupper<v2f64, VR128>;
Modified: llvm/trunk/test/CodeGen/X86/sad.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sad.ll?rev=315161&r1=315160&r2=315161&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sad.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sad.ll Sat Oct 7 18:33:41 2017
@@ -43,7 +43,6 @@ define i32 @sad_16i8() nounwind {
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
; AVX2-NEXT: vmovdqu a+1024(%rax), %xmm2
; AVX2-NEXT: vpsadbw b+1024(%rax), %xmm2, %xmm2
-; AVX2-NEXT: vmovdqa %xmm2, %xmm2
; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1
; AVX2-NEXT: addq $4, %rax
; AVX2-NEXT: jne .LBB0_1
@@ -67,7 +66,6 @@ define i32 @sad_16i8() nounwind {
; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512F-NEXT: vmovdqu a+1024(%rax), %xmm1
; AVX512F-NEXT: vpsadbw b+1024(%rax), %xmm1, %xmm1
-; AVX512F-NEXT: vmovdqa %xmm1, %xmm1
; AVX512F-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: addq $4, %rax
; AVX512F-NEXT: jne .LBB0_1
@@ -93,7 +91,6 @@ define i32 @sad_16i8() nounwind {
; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512BW-NEXT: vmovdqu a+1024(%rax), %xmm1
; AVX512BW-NEXT: vpsadbw b+1024(%rax), %xmm1, %xmm1
-; AVX512BW-NEXT: vmovdqa %xmm1, %xmm1
; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: addq $4, %rax
; AVX512BW-NEXT: jne .LBB0_1
@@ -315,7 +312,6 @@ define i32 @sad_32i8() nounwind {
; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512F-NEXT: vmovdqa a+1024(%rax), %ymm2
; AVX512F-NEXT: vpsadbw b+1024(%rax), %ymm2, %ymm2
-; AVX512F-NEXT: vmovdqa %ymm2, %ymm2
; AVX512F-NEXT: vpaddd %zmm1, %zmm2, %zmm1
; AVX512F-NEXT: addq $4, %rax
; AVX512F-NEXT: jne .LBB1_1
@@ -343,7 +339,6 @@ define i32 @sad_32i8() nounwind {
; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512BW-NEXT: vmovdqa a+1024(%rax), %ymm2
; AVX512BW-NEXT: vpsadbw b+1024(%rax), %ymm2, %ymm2
-; AVX512BW-NEXT: vmovdqa %ymm2, %ymm2
; AVX512BW-NEXT: vpaddd %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: addq $4, %rax
; AVX512BW-NEXT: jne .LBB1_1
More information about the llvm-commits
mailing list