[llvm] r346115 - [X86] Custom type legalize v2i8/v2i16/v2i32 mul to use to pmuludq.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 4 21:02:12 PST 2018
Author: ctopper
Date: Sun Nov 4 21:02:12 2018
New Revision: 346115
URL: http://llvm.org/viewvc/llvm-project?rev=346115&view=rev
Log:
[X86] Custom type legalize v2i8/v2i16/v2i32 mul to use to pmuludq.
v2i8/v2i16/v2i32 are promoted to v2i64. pmuludq takes a v2i64 input and produces a v2i64 output. Since we don't about the upper bits of the type legalized multiply we can use the pmuludq to produce the multiply result for the bits we do care about.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/mmx-arith.ll
llvm/trunk/test/CodeGen/X86/mulvi32.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=346115&r1=346114&r2=346115&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Nov 4 21:02:12 2018
@@ -791,6 +791,10 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
setOperationAction(ISD::UREM, MVT::v2i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i8, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i32, Custom);
+
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
@@ -25911,6 +25915,24 @@ void X86TargetLowering::ReplaceNodeResul
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case ISD::MUL: {
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && VT.getVectorNumElements() == 2 && "Unexpected VT");
+ if (getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger) {
+ // Promote to a pattern that will be turned into PMULUDQ.
+ SDValue N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64,
+ N->getOperand(0));
+ N0 = DAG.getNode(ISD::AND, dl, MVT::v2i64, N0,
+ DAG.getConstant(0xffffffff, dl, MVT::v2i64));
+ SDValue N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64,
+ N->getOperand(1));
+ N1 = DAG.getNode(ISD::AND, dl, MVT::v2i64, N1,
+ DAG.getConstant(0xffffffff, dl, MVT::v2i64));
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v2i64, N0, N1);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, VT, Mul));
+ }
+ return;
+ }
case X86ISD::ADDUS:
case X86ISD::SUBUS:
case X86ISD::AVG: {
@@ -34422,6 +34444,26 @@ static SDValue combineMul(SDNode *N, Sel
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
+ // Look for multiply of 2 identical shuffles with a zero vector. Shuffle the
+ // result and insert the zero there instead. This can occur due to
+ // type legalization of v2i32 multiply to a PMULUDQ pattern.
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (!DCI.isBeforeLegalize() && isa<ShuffleVectorSDNode>(LHS) &&
+ isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
+ LHS.getOperand(1) == RHS.getOperand(1) &&
+ ISD::isBuildVectorAllZeros(LHS.getOperand(1).getNode())) {
+ ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
+ ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
+ if (SVN0->getMask().equals(SVN1->getMask())) {
+ SDLoc dl(N);
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, LHS.getOperand(0),
+ RHS.getOperand(0));
+ return DAG.getVectorShuffle(VT, dl, Mul, DAG.getConstant(0, dl, VT),
+ SVN0->getMask());
+ }
+ }
+
if (SDValue V = combineMulToPMADDWD(N, DAG, Subtarget))
return V;
Modified: llvm/trunk/test/CodeGen/X86/mmx-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-arith.ll?rev=346115&r1=346114&r2=346115&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-arith.ll Sun Nov 4 21:02:12 2018
@@ -213,29 +213,24 @@ define void @test1(x86_mmx* %A, x86_mmx*
; X32-NEXT: movq %xmm0, (%eax)
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
-; X32-NEXT: movdqa %xmm1, %xmm2
-; X32-NEXT: pmuludq %xmm0, %xmm2
-; X32-NEXT: psrlq $32, %xmm1
-; X32-NEXT: pmuludq %xmm0, %xmm1
-; X32-NEXT: psllq $32, %xmm1
-; X32-NEXT: paddq %xmm2, %xmm1
+; X32-NEXT: pmuludq %xmm1, %xmm0
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
+; X32-NEXT: movq %xmm1, (%eax)
+; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,1,3]
+; X32-NEXT: andps %xmm0, %xmm1
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; X32-NEXT: movq %xmm0, (%eax)
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
-; X32-NEXT: andps %xmm1, %xmm0
+; X32-NEXT: orps %xmm1, %xmm0
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
; X32-NEXT: movq %xmm1, (%eax)
; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,1,3]
-; X32-NEXT: orps %xmm0, %xmm1
+; X32-NEXT: xorps %xmm0, %xmm1
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; X32-NEXT: movq %xmm0, (%eax)
-; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
-; X32-NEXT: xorps %xmm1, %xmm0
-; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-NEXT: movq %xmm0, (%eax)
; X32-NEXT: emms
; X32-NEXT: retl
;
@@ -250,29 +245,24 @@ define void @test1(x86_mmx* %A, x86_mmx*
; X64-NEXT: movq %xmm0, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
-; X64-NEXT: movdqa %xmm1, %xmm2
-; X64-NEXT: pmuludq %xmm0, %xmm2
-; X64-NEXT: psrlq $32, %xmm1
-; X64-NEXT: pmuludq %xmm0, %xmm1
-; X64-NEXT: psllq $32, %xmm1
-; X64-NEXT: paddq %xmm2, %xmm1
+; X64-NEXT: pmuludq %xmm1, %xmm0
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
+; X64-NEXT: movq %xmm1, (%rdi)
+; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
+; X64-NEXT: pand %xmm0, %xmm1
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; X64-NEXT: movq %xmm0, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
-; X64-NEXT: pand %xmm1, %xmm0
+; X64-NEXT: por %xmm1, %xmm0
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
; X64-NEXT: movq %xmm1, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
-; X64-NEXT: por %xmm0, %xmm1
+; X64-NEXT: pxor %xmm0, %xmm1
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; X64-NEXT: movq %xmm0, (%rdi)
-; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
-; X64-NEXT: pxor %xmm1, %xmm0
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT: movq %xmm0, (%rdi)
; X64-NEXT: emms
; X64-NEXT: retq
entry:
Modified: llvm/trunk/test/CodeGen/X86/mulvi32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mulvi32.ll?rev=346115&r1=346114&r2=346115&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mulvi32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mulvi32.ll Sun Nov 4 21:02:12 2018
@@ -9,28 +9,12 @@
define <2 x i32> @_mul2xi32a(<2 x i32>, <2 x i32>) {
; SSE-LABEL: _mul2xi32a:
; SSE: # %bb.0:
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: psrlq $32, %xmm2
-; SSE-NEXT: pmuludq %xmm1, %xmm2
-; SSE-NEXT: movdqa %xmm1, %xmm3
-; SSE-NEXT: psrlq $32, %xmm3
-; SSE-NEXT: pmuludq %xmm0, %xmm3
-; SSE-NEXT: paddq %xmm2, %xmm3
-; SSE-NEXT: psllq $32, %xmm3
; SSE-NEXT: pmuludq %xmm1, %xmm0
-; SSE-NEXT: paddq %xmm3, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: _mul2xi32a:
; AVX: # %bb.0:
-; AVX-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX-NEXT: vpsrlq $32, %xmm1, %xmm3
-; AVX-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
-; AVX-NEXT: vpaddq %xmm2, %xmm3, %xmm2
-; AVX-NEXT: vpsllq $32, %xmm2, %xmm2
; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
%r = mul <2 x i32> %0, %1
ret <2 x i32> %r
More information about the llvm-commits
mailing list