[llvm] r347287 - [SelectionDAG] Compute known bits and num sign bits for live out vector registers. Use it to add AssertZExt/AssertSExt in the live in basic blocks
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 19 20:30:27 PST 2018
Author: ctopper
Date: Mon Nov 19 20:30:26 2018
New Revision: 347287
URL: http://llvm.org/viewvc/llvm-project?rev=347287&view=rev
Log:
[SelectionDAG] Compute known bits and num sign bits for live out vector registers. Use it to add AssertZExt/AssertSExt in the live in basic blocks
Summary:
We already support this for scalars, but it was explicitly disabled for vectors. In the updated test cases this allows us to see the upper bits are zero to use less multiply instructions to emulate a 64 bit multiply.
This should help with this ispc issue that a coworker pointed me to https://github.com/ispc/ispc/issues/1362
Reviewers: spatel, efriedma, RKSimon, arsenm
Reviewed By: spatel
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D54725
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
llvm/trunk/test/CodeGen/X86/vector-mul.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=347287&r1=347286&r2=347287&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Mon Nov 19 20:30:26 2018
@@ -823,7 +823,7 @@ SDValue RegsForValue::getCopyFromRegs(Se
// If the source register was virtual and if we know something about it,
// add an assert node.
if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
- !RegisterVT.isInteger() || RegisterVT.isVector())
+ !RegisterVT.isInteger())
continue;
const FunctionLoweringInfo::LiveOutInfo *LOI =
@@ -831,7 +831,7 @@ SDValue RegsForValue::getCopyFromRegs(Se
if (!LOI)
continue;
- unsigned RegSize = RegisterVT.getSizeInBits();
+ unsigned RegSize = RegisterVT.getScalarSizeInBits();
unsigned NumSignBits = LOI->NumSignBits;
unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=347287&r1=347286&r2=347287&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Mon Nov 19 20:30:26 2018
@@ -697,10 +697,10 @@ void SelectionDAGISel::ComputeLiveOutVRe
if (!TargetRegisterInfo::isVirtualRegister(DestReg))
continue;
- // Ignore non-scalar or non-integer values.
+ // Ignore non-integer values.
SDValue Src = N->getOperand(2);
EVT SrcVT = Src.getValueType();
- if (!SrcVT.isInteger() || SrcVT.isVector())
+ if (!SrcVT.isInteger())
continue;
unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
Modified: llvm/trunk/test/CodeGen/X86/vector-mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-mul.ll?rev=347287&r1=347286&r2=347287&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-mul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-mul.ll Mon Nov 19 20:30:26 2018
@@ -1253,35 +1253,21 @@ define <2 x i64> @mul_v2i64_zext_cross_b
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; X86-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
-; X86-NEXT: movdqa %xmm0, %xmm2
-; X86-NEXT: pmuludq %xmm1, %xmm2
-; X86-NEXT: psrlq $32, %xmm0
; X86-NEXT: pmuludq %xmm1, %xmm0
-; X86-NEXT: psllq $32, %xmm0
-; X86-NEXT: paddq %xmm2, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: mul_v2i64_zext_cross_bb:
; X64: # %bb.0:
; X64-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; X64-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
-; X64-NEXT: movdqa %xmm0, %xmm2
-; X64-NEXT: pmuludq %xmm1, %xmm2
-; X64-NEXT: psrlq $32, %xmm0
; X64-NEXT: pmuludq %xmm1, %xmm0
-; X64-NEXT: psllq $32, %xmm0
-; X64-NEXT: paddq %xmm2, %xmm0
; X64-NEXT: retq
;
; X64-AVX-LABEL: mul_v2i64_zext_cross_bb:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; X64-AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
-; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
-; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
; X64-AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpsllq $32, %xmm0, %xmm0
-; X64-AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; X64-AVX-NEXT: retq
%a = load <2 x i32>, <2 x i32>* %in
%b = zext <2 x i32> %a to <2 x i64>
@@ -1302,19 +1288,9 @@ define <4 x i64> @mul_v4i64_zext_cross_b
; X86-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; X86-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; X86-NEXT: pmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero
-; X86-NEXT: pmovzxdq {{.*#+}} xmm3 = mem[0],zero,mem[1],zero
-; X86-NEXT: movdqa %xmm0, %xmm4
-; X86-NEXT: pmuludq %xmm3, %xmm4
-; X86-NEXT: psrlq $32, %xmm0
-; X86-NEXT: pmuludq %xmm3, %xmm0
-; X86-NEXT: psllq $32, %xmm0
-; X86-NEXT: paddq %xmm4, %xmm0
-; X86-NEXT: movdqa %xmm1, %xmm3
-; X86-NEXT: pmuludq %xmm2, %xmm3
-; X86-NEXT: psrlq $32, %xmm1
; X86-NEXT: pmuludq %xmm2, %xmm1
-; X86-NEXT: psllq $32, %xmm1
-; X86-NEXT: paddq %xmm3, %xmm1
+; X86-NEXT: pmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero
+; X86-NEXT: pmuludq %xmm2, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: mul_v4i64_zext_cross_bb:
@@ -1322,19 +1298,9 @@ define <4 x i64> @mul_v4i64_zext_cross_b
; X64-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; X64-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; X64-NEXT: pmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero
-; X64-NEXT: pmovzxdq {{.*#+}} xmm3 = mem[0],zero,mem[1],zero
-; X64-NEXT: movdqa %xmm0, %xmm4
-; X64-NEXT: pmuludq %xmm3, %xmm4
-; X64-NEXT: psrlq $32, %xmm0
-; X64-NEXT: pmuludq %xmm3, %xmm0
-; X64-NEXT: psllq $32, %xmm0
-; X64-NEXT: paddq %xmm4, %xmm0
-; X64-NEXT: movdqa %xmm1, %xmm3
-; X64-NEXT: pmuludq %xmm2, %xmm3
-; X64-NEXT: psrlq $32, %xmm1
; X64-NEXT: pmuludq %xmm2, %xmm1
-; X64-NEXT: psllq $32, %xmm1
-; X64-NEXT: paddq %xmm3, %xmm1
+; X64-NEXT: pmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero
+; X64-NEXT: pmuludq %xmm2, %xmm0
; X64-NEXT: retq
;
; X64-XOP-LABEL: mul_v4i64_zext_cross_bb:
@@ -1345,16 +1311,8 @@ define <4 x i64> @mul_v4i64_zext_cross_b
; X64-XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; X64-XOP-NEXT: vpmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero
; X64-XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
-; X64-XOP-NEXT: vpmuludq %xmm2, %xmm3, %xmm4
-; X64-XOP-NEXT: vpsrlq $32, %xmm3, %xmm3
; X64-XOP-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
-; X64-XOP-NEXT: vpsllq $32, %xmm2, %xmm2
-; X64-XOP-NEXT: vpaddq %xmm2, %xmm4, %xmm2
-; X64-XOP-NEXT: vpmuludq %xmm1, %xmm0, %xmm3
-; X64-XOP-NEXT: vpsrlq $32, %xmm0, %xmm0
; X64-XOP-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; X64-XOP-NEXT: vpsllq $32, %xmm0, %xmm0
-; X64-XOP-NEXT: vpaddq %xmm0, %xmm3, %xmm0
; X64-XOP-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X64-XOP-NEXT: retq
;
@@ -1362,11 +1320,7 @@ define <4 x i64> @mul_v4i64_zext_cross_b
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
; X64-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; X64-AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm2
-; X64-AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
; X64-AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0
; X64-AVX2-NEXT: retq
%a = load <4 x i32>, <4 x i32>* %in
%b = zext <4 x i32> %a to <4 x i64>
More information about the llvm-commits
mailing list