[llvm] r352783 - [Intrinsic] Expand SMULFIX to MUL, MULH[US], or [US]MUL_LOHI on vector arguments
Leonard Chan via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 31 11:15:37 PST 2019
Author: leonardchan
Date: Thu Jan 31 11:15:37 2019
New Revision: 352783
URL: http://llvm.org/viewvc/llvm-project?rev=352783&view=rev
Log:
[Intrinsic] Expand SMULFIX to MUL, MULH[US], or [US]MUL_LOHI on vector arguments
r zero scale SMULFIX, expand into MUL which produces better code for X86.
For vector arguments, expand into MUL if SMULFIX is provided with a zero scale.
Otherwise, expand into MULH[US] or [US]MUL_LOHI.
Differential Revision: https://reviews.llvm.org/D56987
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/trunk/test/CodeGen/X86/smul_fix.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp?rev=352783&r1=352782&r2=352783&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp Thu Jan 31 11:15:37 2019
@@ -141,6 +141,7 @@ class VectorLegalizer {
SDValue ExpandROT(SDValue Op);
SDValue ExpandFMINNUM_FMAXNUM(SDValue Op);
SDValue ExpandAddSubSat(SDValue Op);
+ SDValue ExpandFixedPointMul(SDValue Op);
SDValue ExpandStrictFPOp(SDValue Op);
/// Implements vector promotion.
@@ -782,6 +783,8 @@ SDValue VectorLegalizer::Expand(SDValue
case ISD::UADDSAT:
case ISD::SADDSAT:
return ExpandAddSubSat(Op);
+ case ISD::SMULFIX:
+ return ExpandFixedPointMul(Op);
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
@@ -1216,6 +1219,12 @@ SDValue VectorLegalizer::ExpandAddSubSat
return Expanded;
return DAG.UnrollVectorOp(Op.getNode());
}
+
+SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) {
+ if (SDValue Expanded = TLI.expandFixedPointMul(Op.getNode(), DAG))
+ return Expanded;
+ return DAG.UnrollVectorOp(Op.getNode());
+}
SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
EVT VT = Op.getValueType();
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=352783&r1=352782&r2=352783&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Thu Jan 31 11:15:37 2019
@@ -5362,29 +5362,25 @@ SDValue TargetLowering::expandAddSubSat(
SDValue
TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
assert(Node->getOpcode() == ISD::SMULFIX && "Expected opcode to be SMULFIX.");
- assert(Node->getNumOperands() == 3 &&
- "Expected signed fixed point multiplication to have 3 operands.");
SDLoc dl(Node);
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
- assert(LHS.getValueType().isScalarInteger() &&
- "Expected operands to be integers. Vector of int arguments should "
- "already be unrolled.");
- assert(RHS.getValueType().isScalarInteger() &&
- "Expected operands to be integers. Vector of int arguments should "
- "already be unrolled.");
+ EVT VT = LHS.getValueType();
+ unsigned Scale = Node->getConstantOperandVal(2);
+
+ // [us]mul.fix(a, b, 0) -> mul(a, b)
+ if (!Scale) {
+ if (VT.isVector() && !isOperationLegalOrCustom(ISD::MUL, VT))
+ return SDValue();
+ return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ }
+
assert(LHS.getValueType() == RHS.getValueType() &&
"Expected both operands to be the same type");
-
- unsigned Scale = Node->getConstantOperandVal(2);
- EVT VT = LHS.getValueType();
assert(Scale < VT.getScalarSizeInBits() &&
"Expected scale to be less than the number of bits.");
- if (!Scale)
- return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
-
// Get the upper and lower bits of the result.
SDValue Lo, Hi;
if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
@@ -5395,6 +5391,8 @@ TargetLowering::expandFixedPointMul(SDNo
} else if (isOperationLegalOrCustom(ISD::MULHS, VT)) {
Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
Hi = DAG.getNode(ISD::MULHS, dl, VT, LHS, RHS);
+ } else if (VT.isVector()) {
+ return SDValue();
} else {
report_fatal_error("Unable to expand signed fixed point multiplication.");
}
Modified: llvm/trunk/test/CodeGen/X86/smul_fix.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/smul_fix.ll?rev=352783&r1=352782&r2=352783&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/smul_fix.ll (original)
+++ llvm/trunk/test/CodeGen/X86/smul_fix.ll Thu Jan 31 11:15:37 2019
@@ -135,52 +135,27 @@ define i4 @func3(i4 %x, i4 %y) nounwind
define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-LABEL: vec:
; X64: # %bb.0:
-; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
-; X64-NEXT: movd %xmm2, %eax
-; X64-NEXT: cltq
-; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
-; X64-NEXT: movd %xmm2, %ecx
-; X64-NEXT: movslq %ecx, %rcx
-; X64-NEXT: imulq %rax, %rcx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: shrq $32, %rax
-; X64-NEXT: shldl $30, %ecx, %eax
-; X64-NEXT: movd %eax, %xmm2
-; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
-; X64-NEXT: movd %xmm3, %eax
-; X64-NEXT: cltq
-; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
-; X64-NEXT: movd %xmm3, %ecx
-; X64-NEXT: movslq %ecx, %rcx
-; X64-NEXT: imulq %rax, %rcx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: shrq $32, %rax
-; X64-NEXT: shldl $30, %ecx, %eax
-; X64-NEXT: movd %eax, %xmm3
-; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; X64-NEXT: movd %xmm1, %eax
-; X64-NEXT: cltq
-; X64-NEXT: movd %xmm0, %ecx
-; X64-NEXT: movslq %ecx, %rcx
-; X64-NEXT: imulq %rax, %rcx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: shrq $32, %rax
-; X64-NEXT: shldl $30, %ecx, %eax
-; X64-NEXT: movd %eax, %xmm2
-; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
-; X64-NEXT: movd %xmm1, %eax
-; X64-NEXT: cltq
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X64-NEXT: movd %xmm0, %ecx
-; X64-NEXT: movslq %ecx, %rcx
-; X64-NEXT: imulq %rax, %rcx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: shrq $32, %rax
-; X64-NEXT: shldl $30, %ecx, %eax
-; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; X64-NEXT: movdqa %xmm2, %xmm0
+; X64-NEXT: pxor %xmm2, %xmm2
+; X64-NEXT: pxor %xmm3, %xmm3
+; X64-NEXT: pcmpgtd %xmm1, %xmm3
+; X64-NEXT: pand %xmm0, %xmm3
+; X64-NEXT: pcmpgtd %xmm0, %xmm2
+; X64-NEXT: pand %xmm1, %xmm2
+; X64-NEXT: paddd %xmm3, %xmm2
+; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; X64-NEXT: pmuludq %xmm1, %xmm0
+; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X64-NEXT: pmuludq %xmm3, %xmm1
+; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
+; X64-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; X64-NEXT: psubd %xmm2, %xmm4
+; X64-NEXT: pslld $30, %xmm4
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT: psrld $2, %xmm0
+; X64-NEXT: por %xmm4, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: vec:
@@ -295,32 +270,13 @@ define i4 @func6(i4 %x, i4 %y) nounwind
define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-LABEL: vec2:
; X64: # %bb.0:
-; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
-; X64-NEXT: movd %xmm2, %eax
-; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
-; X64-NEXT: movd %xmm2, %ecx
-; X64-NEXT: imull %eax, %ecx
-; X64-NEXT: movd %ecx, %xmm2
-; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
-; X64-NEXT: movd %xmm3, %eax
-; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
-; X64-NEXT: movd %xmm3, %ecx
-; X64-NEXT: imull %eax, %ecx
-; X64-NEXT: movd %ecx, %xmm3
-; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; X64-NEXT: movd %xmm1, %eax
-; X64-NEXT: movd %xmm0, %ecx
-; X64-NEXT: imull %eax, %ecx
-; X64-NEXT: movd %ecx, %xmm2
-; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
-; X64-NEXT: movd %xmm1, %eax
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X64-NEXT: movd %xmm0, %ecx
-; X64-NEXT: imull %eax, %ecx
-; X64-NEXT: movd %ecx, %xmm0
-; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; X64-NEXT: movdqa %xmm2, %xmm0
+; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X64-NEXT: pmuludq %xmm1, %xmm0
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X64-NEXT: pmuludq %xmm2, %xmm1
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: retq
;
; X86-LABEL: vec2:
More information about the llvm-commits
mailing list