[llvm] r353059 - [Intrinsic] Unsigned Fixed Point Multiplication Intrinsic
Leonard Chan via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 4 09:18:11 PST 2019
Author: leonardchan
Date: Mon Feb 4 09:18:11 2019
New Revision: 353059
URL: http://llvm.org/viewvc/llvm-project?rev=353059&view=rev
Log:
[Intrinsic] Unsigned Fixed Point Multiplication Intrinsic
Add an intrinsic that takes 2 unsigned integers with the scale of them
provided as the third argument and performs fixed point multiplication on
them.
This is a part of implementing fixed point arithmetic in clang where some of
the more complex operations will be implemented as intrinsics.
Differential Revision: https://reviews.llvm.org/D55625
Added:
llvm/trunk/test/CodeGen/X86/umul_fix.ll
Modified:
llvm/trunk/docs/LangRef.rst
llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h
llvm/trunk/include/llvm/CodeGen/TargetLowering.h
llvm/trunk/include/llvm/IR/Intrinsics.td
llvm/trunk/include/llvm/Target/TargetSelectionDAG.td
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
llvm/trunk/lib/IR/Verifier.cpp
Modified: llvm/trunk/docs/LangRef.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/LangRef.rst?rev=353059&r1=353058&r2=353059&view=diff
==============================================================================
--- llvm/trunk/docs/LangRef.rst (original)
+++ llvm/trunk/docs/LangRef.rst Mon Feb 4 09:18:11 2019
@@ -13089,7 +13089,8 @@ Arguments
""""""""""
The arguments (%a and %b) and the result may be of integer types of any bit
-width, but they must have the same bit width. ``%a`` and ``%b`` are the two
+width, but they must have the same bit width. The arguments may also work with
+int vectors of the same length and int size. ``%a`` and ``%b`` are the two
values that will undergo signed fixed point multiplication. The argument
``%scale`` represents the scale of both operands, and must be a constant
integer.
@@ -13105,7 +13106,7 @@ If the result value cannot be precisely
value is rounded up or down to the closest representable value. The rounding
direction is unspecified.
-It is undefined behavior if the source value does not fit within the range of
+It is undefined behavior if the result value does not fit within the range of
the fixed point type.
@@ -13122,6 +13123,65 @@ Examples
%res = call i4 @llvm.smul.fix.i4(i4 3, i4 -3, i32 1) ; %res = -5 (or -4) (1.5 x -1.5 = -2.25)
+'``llvm.umul.fix.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.umul.fix``
+on any integer bit width or vectors of integers.
+
+::
+
+ declare i16 @llvm.umul.fix.i16(i16 %a, i16 %b, i32 %scale)
+ declare i32 @llvm.umul.fix.i32(i32 %a, i32 %b, i32 %scale)
+ declare i64 @llvm.umul.fix.i64(i64 %a, i64 %b, i32 %scale)
+ declare <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %scale)
+
+Overview
+"""""""""
+
+The '``llvm.umul.fix``' family of intrinsic functions perform unsigned
+fixed point multiplication on 2 arguments of the same scale.
+
+Arguments
+""""""""""
+
+The arguments (%a and %b) and the result may be of integer types of any bit
+width, but they must have the same bit width. The arguments may also work with
+int vectors of the same length and int size. ``%a`` and ``%b`` are the two
+values that will undergo unsigned fixed point multiplication. The argument
+``%scale`` represents the scale of both operands, and must be a constant
+integer.
+
+Semantics:
+""""""""""
+
+This operation performs unsigned fixed point multiplication on the 2 arguments of a
+specified scale. The result will also be returned in the same scale specified
+in the third argument.
+
+If the result value cannot be precisely represented in the given scale, the
+value is rounded up or down to the closest representable value. The rounding
+direction is unspecified.
+
+It is undefined behavior if the result value does not fit within the range of
+the fixed point type.
+
+
+Examples
+"""""""""
+
+.. code-block:: llvm
+
+ %res = call i4 @llvm.umul.fix.i4(i4 3, i4 2, i32 0) ; %res = 6 (2 x 3 = 6)
+ %res = call i4 @llvm.umul.fix.i4(i4 3, i4 2, i32 1) ; %res = 3 (1.5 x 1 = 1.5)
+
+ ; The result in the following could be rounded down to 3.5 or up to 4
+ %res = call i4 @llvm.umul.fix.i4(i4 15, i4 1, i32 1) ; %res = 7 (or 8) (7.5 x 0.5 = 3.75)
+
+
Specialised Arithmetic Intrinsics
---------------------------------
Modified: llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h?rev=353059&r1=353058&r2=353059&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h (original)
+++ llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h Mon Feb 4 09:18:11 2019
@@ -271,12 +271,12 @@ namespace ISD {
/// resulting value is this minimum value.
SSUBSAT, USUBSAT,
- /// RESULT = SMULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on
+ /// RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on
/// 2 integers with the same width and scale. SCALE represents the scale of
/// both operands as fixed point numbers. This SCALE parameter must be a
/// constant integer. A scale of zero is effectively performing
/// multiplication on 2 integers.
- SMULFIX,
+ SMULFIX, UMULFIX,
/// Simple binary floating point operators.
FADD, FSUB, FMUL, FDIV, FREM,
Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=353059&r1=353058&r2=353059&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Mon Feb 4 09:18:11 2019
@@ -836,6 +836,7 @@ public:
default:
llvm_unreachable("Unexpected fixed point operation.");
case ISD::SMULFIX:
+ case ISD::UMULFIX:
Supported = isSupportedFixedPointOperation(Op, VT, Scale);
break;
}
Modified: llvm/trunk/include/llvm/IR/Intrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/Intrinsics.td?rev=353059&r1=353058&r2=353059&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/Intrinsics.td (original)
+++ llvm/trunk/include/llvm/IR/Intrinsics.td Mon Feb 4 09:18:11 2019
@@ -850,6 +850,10 @@ def int_smul_fix : Intrinsic<[llvm_anyin
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
+def int_umul_fix : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
+
//===------------------------- Memory Use Markers -------------------------===//
//
def int_lifetime_start : Intrinsic<[],
Modified: llvm/trunk/include/llvm/Target/TargetSelectionDAG.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetSelectionDAG.td?rev=353059&r1=353058&r2=353059&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetSelectionDAG.td (original)
+++ llvm/trunk/include/llvm/Target/TargetSelectionDAG.td Mon Feb 4 09:18:11 2019
@@ -124,7 +124,7 @@ def SDTIntSatNoShOp : SDTypeProfile<1, 2
def SDTIntBinHiLoOp : SDTypeProfile<2, 2, [ // mulhi, mullo, sdivrem, udivrem
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,SDTCisInt<0>
]>;
-def SDTIntScaledBinOp : SDTypeProfile<1, 3, [ // smulfix
+def SDTIntScaledBinOp : SDTypeProfile<1, 3, [ // smulfix, umulfix
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
]>;
@@ -389,7 +389,9 @@ def saddsat : SDNode<"ISD::SADDSAT"
def uaddsat : SDNode<"ISD::UADDSAT" , SDTIntBinOp, [SDNPCommutative]>;
def ssubsat : SDNode<"ISD::SSUBSAT" , SDTIntBinOp>;
def usubsat : SDNode<"ISD::USUBSAT" , SDTIntBinOp>;
+
def smulfix : SDNode<"ISD::SMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>;
+def umulfix : SDNode<"ISD::UMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>;
def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>;
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=353059&r1=353058&r2=353059&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Mon Feb 4 09:18:11 2019
@@ -1127,7 +1127,8 @@ void SelectionDAGLegalize::LegalizeOp(SD
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
}
- case ISD::SMULFIX: {
+ case ISD::SMULFIX:
+ case ISD::UMULFIX: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -3290,6 +3291,7 @@ bool SelectionDAGLegalize::ExpandNode(SD
Results.push_back(TLI.expandAddSubSat(Node, DAG));
break;
case ISD::SMULFIX:
+ case ISD::UMULFIX:
Results.push_back(TLI.expandFixedPointMul(Node, DAG));
break;
case ISD::SADDO:
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp?rev=353059&r1=353058&r2=353059&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp Mon Feb 4 09:18:11 2019
@@ -148,7 +148,8 @@ void DAGTypeLegalizer::PromoteIntegerRes
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break;
- case ISD::SMULFIX: Res = PromoteIntRes_SMULFIX(N); break;
+ case ISD::SMULFIX:
+ case ISD::UMULFIX: Res = PromoteIntRes_MULFIX(N); break;
case ISD::ATOMIC_LOAD:
Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
@@ -645,11 +646,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_
return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SMULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
// Can just promote the operands then continue with operation.
SDLoc dl(N);
- SDValue Op1Promoted = SExtPromotedInteger(N->getOperand(0));
- SDValue Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+ SDValue Op1Promoted, Op2Promoted;
+ if (N->getOpcode() == ISD::SMULFIX) {
+ Op1Promoted = SExtPromotedInteger(N->getOperand(0));
+ Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+ } else {
+ Op1Promoted = ZExtPromotedInteger(N->getOperand(0));
+ Op2Promoted = ZExtPromotedInteger(N->getOperand(1));
+ }
EVT PromotedType = Op1Promoted.getValueType();
return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted,
N->getOperand(2));
@@ -1090,7 +1097,8 @@ bool DAGTypeLegalizer::PromoteIntegerOpe
case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break;
- case ISD::SMULFIX: Res = PromoteIntOp_SMULFIX(N); break;
+ case ISD::SMULFIX:
+ case ISD::UMULFIX: Res = PromoteIntOp_MULFIX(N); break;
case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
}
@@ -1452,7 +1460,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_A
return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_SMULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntOp_MULFIX(SDNode *N) {
SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
return SDValue(
DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0);
@@ -1620,7 +1628,8 @@ void DAGTypeLegalizer::ExpandIntegerResu
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break;
- case ISD::SMULFIX: ExpandIntRes_SMULFIX(N, Lo, Hi); break;
+ case ISD::SMULFIX:
+ case ISD::UMULFIX: ExpandIntRes_MULFIX(N, Lo, Hi); break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -2588,8 +2597,12 @@ void DAGTypeLegalizer::ExpandIntRes_ADDS
SplitInteger(Result, Lo, Hi);
}
-void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(
+ (N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::UMULFIX) &&
+ "Expected operand to be signed or unsigned fixed point multiplication");
+
SDLoc dl(N);
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
@@ -2607,10 +2620,12 @@ void DAGTypeLegalizer::ExpandIntRes_SMUL
GetExpandedInteger(RHS, RL, RH);
SmallVector<SDValue, 4> Result;
- if (!TLI.expandMUL_LOHI(ISD::SMUL_LOHI, VT, dl, LHS, RHS, Result, NVT, DAG,
+ bool Signed = N->getOpcode() == ISD::SMULFIX;
+ unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
+ if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG,
TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
LL, LH, RL, RH)) {
- report_fatal_error("Unable to expand SMUL_FIX using SMUL_LOHI.");
+ report_fatal_error("Unable to expand MUL_FIX using MUL_LOHI.");
return;
}
@@ -2671,9 +2686,16 @@ void DAGTypeLegalizer::ExpandIntRes_SMUL
Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt);
Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt));
+ } else if (Scale == VTSize) {
+ assert(
+ !Signed &&
+ "Only unsigned types can have a scale equal to the operand bit width");
+
+ Lo = ResultHL;
+ Hi = ResultHH;
} else {
- llvm_unreachable(
- "Expected the scale to be less than the width of the operands");
+ llvm_unreachable("Expected the scale to be less than or equal to the width "
+ "of the operands");
}
}
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h?rev=353059&r1=353058&r2=353059&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h Mon Feb 4 09:18:11 2019
@@ -344,7 +344,7 @@ private:
SDValue PromoteIntRes_VAARG(SDNode *N);
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
- SDValue PromoteIntRes_SMULFIX(SDNode *N);
+ SDValue PromoteIntRes_MULFIX(SDNode *N);
SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
// Integer Operand Promotion.
@@ -378,7 +378,7 @@ private:
SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
- SDValue PromoteIntOp_SMULFIX(SDNode *N);
+ SDValue PromoteIntOp_MULFIX(SDNode *N);
SDValue PromoteIntOp_FPOWI(SDNode *N);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -435,7 +435,7 @@ private:
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_SMULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -692,7 +692,7 @@ private:
SDValue ScalarizeVecRes_UNDEF(SDNode *N);
SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
- SDValue ScalarizeVecRes_SMULFIX(SDNode *N);
+ SDValue ScalarizeVecRes_MULFIX(SDNode *N);
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
@@ -729,7 +729,7 @@ private:
void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp?rev=353059&r1=353058&r2=353059&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp Mon Feb 4 09:18:11 2019
@@ -425,7 +425,8 @@ SDValue VectorLegalizer::LegalizeOp(SDVa
case ISD::USUBSAT:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
- case ISD::SMULFIX: {
+ case ISD::SMULFIX:
+ case ISD::UMULFIX: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -784,6 +785,7 @@ SDValue VectorLegalizer::Expand(SDValue
case ISD::SADDSAT:
return ExpandAddSubSat(Op);
case ISD::SMULFIX:
+ case ISD::UMULFIX:
return ExpandFixedPointMul(Op);
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp?rev=353059&r1=353058&r2=353059&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Mon Feb 4 09:18:11 2019
@@ -172,7 +172,8 @@ void DAGTypeLegalizer::ScalarizeVectorRe
R = ScalarizeVecRes_StrictFPOp(N);
break;
case ISD::SMULFIX:
- R = ScalarizeVecRes_SMULFIX(N);
+ case ISD::UMULFIX:
+ R = ScalarizeVecRes_MULFIX(N);
break;
}
@@ -196,7 +197,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRe
Op0.getValueType(), Op0, Op1, Op2);
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_SMULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_MULFIX(SDNode *N) {
SDValue Op0 = GetScalarizedVector(N->getOperand(0));
SDValue Op1 = GetScalarizedVector(N->getOperand(1));
SDValue Op2 = N->getOperand(2);
@@ -859,7 +860,8 @@ void DAGTypeLegalizer::SplitVectorResult
SplitVecRes_StrictFPOp(N, Lo, Hi);
break;
case ISD::SMULFIX:
- SplitVecRes_SMULFIX(N, Lo, Hi);
+ case ISD::UMULFIX:
+ SplitVecRes_MULFIX(N, Lo, Hi);
break;
}
@@ -898,8 +900,7 @@ void DAGTypeLegalizer::SplitVecRes_Terna
Op0Hi, Op1Hi, Op2Hi);
}
-void DAGTypeLegalizer::SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDValue RHSLo, RHSHi;
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=353059&r1=353058&r2=353059&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Mon Feb 4 09:18:11 2019
@@ -5077,6 +5077,17 @@ SDDbgValue *SelectionDAGBuilder::getDbgV
# define setjmp_undefined_for_msvc
#endif
+static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
+ switch (Intrinsic) {
+ case Intrinsic::smul_fix:
+ return ISD::SMULFIX;
+ case Intrinsic::umul_fix:
+ return ISD::UMULFIX;
+ default:
+ llvm_unreachable("Unhandled fixed point intrinsic");
+ }
+}
+
/// Lower the call to the specified intrinsic function. If we want to emit this
/// as a call to a named external function, return the name. Otherwise, lower it
/// and return null.
@@ -5880,12 +5891,13 @@ SelectionDAGBuilder::visitIntrinsicCall(
setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
return nullptr;
}
- case Intrinsic::smul_fix: {
+ case Intrinsic::smul_fix:
+ case Intrinsic::umul_fix: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
- setValue(&I,
- DAG.getNode(ISD::SMULFIX, sdl, Op1.getValueType(), Op1, Op2, Op3));
+ setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
+ Op1.getValueType(), Op1, Op2, Op3));
return nullptr;
}
case Intrinsic::stacksave: {
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp?rev=353059&r1=353058&r2=353059&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp Mon Feb 4 09:18:11 2019
@@ -300,6 +300,7 @@ std::string SDNode::getOperationName(con
case ISD::SSUBSAT: return "ssubsat";
case ISD::USUBSAT: return "usubsat";
case ISD::SMULFIX: return "smulfix";
+ case ISD::UMULFIX: return "umulfix";
// Conversion operators.
case ISD::SIGN_EXTEND: return "sign_extend";
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=353059&r1=353058&r2=353059&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Mon Feb 4 09:18:11 2019
@@ -5415,7 +5415,9 @@ SDValue TargetLowering::expandAddSubSat(
SDValue
TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
- assert(Node->getOpcode() == ISD::SMULFIX && "Expected opcode to be SMULFIX.");
+ assert((Node->getOpcode() == ISD::SMULFIX ||
+ Node->getOpcode() == ISD::UMULFIX) &&
+ "Expected opcode to be SMULFIX or UMULFIX.");
SDLoc dl(Node);
SDValue LHS = Node->getOperand(0);
@@ -5430,27 +5432,37 @@ TargetLowering::expandFixedPointMul(SDNo
return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
}
+ unsigned VTSize = VT.getScalarSizeInBits();
+ bool Signed = Node->getOpcode() == ISD::SMULFIX;
+
+ assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
+ "Expected scale to be less than the number of bits if signed or at "
+ "most the number of bits if unsigned.");
assert(LHS.getValueType() == RHS.getValueType() &&
"Expected both operands to be the same type");
- assert(Scale < VT.getScalarSizeInBits() &&
- "Expected scale to be less than the number of bits.");
// Get the upper and lower bits of the result.
SDValue Lo, Hi;
- if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
- SDValue Result =
- DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+ unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
+ unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
+ if (isOperationLegalOrCustom(LoHiOp, VT)) {
+ SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
Lo = Result.getValue(0);
Hi = Result.getValue(1);
- } else if (isOperationLegalOrCustom(ISD::MULHS, VT)) {
+ } else if (isOperationLegalOrCustom(HiOp, VT)) {
Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
- Hi = DAG.getNode(ISD::MULHS, dl, VT, LHS, RHS);
+ Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
} else if (VT.isVector()) {
return SDValue();
} else {
- report_fatal_error("Unable to expand signed fixed point multiplication.");
+ report_fatal_error("Unable to expand fixed point multiplication.");
}
+ if (Scale == VTSize)
+ // Result is just the top half since we'd be shifting by the width of the
+ // operand.
+ return Hi;
+
// The result will need to be shifted right by the scale since both operands
// are scaled. The result is given to us in 2 halves, so we only want part of
// both in the result.
Modified: llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp?rev=353059&r1=353058&r2=353059&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp (original)
+++ llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp Mon Feb 4 09:18:11 2019
@@ -624,6 +624,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SSUBSAT, VT, Expand);
setOperationAction(ISD::USUBSAT, VT, Expand);
setOperationAction(ISD::SMULFIX, VT, Expand);
+ setOperationAction(ISD::UMULFIX, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
Modified: llvm/trunk/lib/IR/Verifier.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/Verifier.cpp?rev=353059&r1=353058&r2=353059&view=diff
==============================================================================
--- llvm/trunk/lib/IR/Verifier.cpp (original)
+++ llvm/trunk/lib/IR/Verifier.cpp Mon Feb 4 09:18:11 2019
@@ -4574,22 +4574,31 @@ void Verifier::visitIntrinsicCall(Intrin
"of ints");
break;
}
- case Intrinsic::smul_fix: {
+ case Intrinsic::smul_fix:
+ case Intrinsic::umul_fix: {
Value *Op1 = Call.getArgOperand(0);
Value *Op2 = Call.getArgOperand(1);
Assert(Op1->getType()->isIntOrIntVectorTy(),
- "first operand of smul_fix must be an int type or vector "
+ "first operand of [us]mul_fix must be an int type or vector "
"of ints");
Assert(Op2->getType()->isIntOrIntVectorTy(),
- "second operand of smul_fix must be an int type or vector "
+ "second operand of [us]mul_fix must be an int type or vector "
"of ints");
auto *Op3 = dyn_cast<ConstantInt>(Call.getArgOperand(2));
- Assert(Op3, "third argument of smul_fix must be a constant integer");
+ Assert(Op3, "third argument of [us]mul_fix must be a constant integer");
Assert(Op3->getType()->getBitWidth() <= 32,
- "third argument of smul_fix must fit within 32 bits");
- Assert(Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(),
- "the scale of smul_fix must be less than the width of the operands");
+ "third argument of [us]mul_fix must fit within 32 bits");
+
+ if (ID == Intrinsic::smul_fix) {
+ Assert(
+ Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(),
+ "the scale of smul_fix must be less than the width of the operands");
+ } else {
+ Assert(Op3->getZExtValue() <= Op1->getType()->getScalarSizeInBits(),
+ "the scale of umul_fix must be less than or equal to the width of "
+ "the operands");
+ }
break;
}
};
Added: llvm/trunk/test/CodeGen/X86/umul_fix.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/umul_fix.ll?rev=353059&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/umul_fix.ll (added)
+++ llvm/trunk/test/CodeGen/X86/umul_fix.ll Mon Feb 4 09:18:11 2019
@@ -0,0 +1,393 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
+
+declare i4 @llvm.umul.fix.i4 (i4, i4, i32)
+declare i32 @llvm.umul.fix.i32 (i32, i32, i32)
+declare i64 @llvm.umul.fix.i64 (i64, i64, i32)
+declare <4 x i32> @llvm.umul.fix.v4i32(<4 x i32>, <4 x i32>, i32)
+
+define i32 @func(i32 %x, i32 %y) nounwind {
+; X64-LABEL: func:
+; X64: # %bb.0:
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: imulq %rax, %rcx
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: shrq $32, %rax
+; X64-NEXT: shldl $30, %ecx, %eax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-NEXT: retq
+;
+; X86-LABEL: func:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: shrdl $2, %edx, %eax
+; X86-NEXT: retl
+ %tmp = call i32 @llvm.umul.fix.i32(i32 %x, i32 %y, i32 2);
+ ret i32 %tmp;
+}
+
+define i64 @func2(i64 %x, i64 %y) nounwind {
+; X64-LABEL: func2:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %rsi
+; X64-NEXT: shrdq $2, %rdx, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: func2:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl %ebx, %ebp
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: adcl %edi, %edx
+; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
+; X86-NEXT: addl %esi, %edx
+; X86-NEXT: shldl $30, %eax, %edx
+; X86-NEXT: shldl $30, %ecx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+ %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 2);
+ ret i64 %tmp;
+}
+
+define i4 @func3(i4 %x, i4 %y) nounwind {
+; X64-LABEL: func3:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: andl $15, %esi
+; X64-NEXT: andl $15, %eax
+; X64-NEXT: imull %esi, %eax
+; X64-NEXT: shrb $2, %al
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+;
+; X86-LABEL: func3:
+; X86: # %bb.0:
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: andb $15, %al
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: andb $15, %cl
+; X86-NEXT: movzbl %cl, %ecx
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: imull %ecx, %eax
+; X86-NEXT: shrb $2, %al
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+ %tmp = call i4 @llvm.umul.fix.i4(i4 %x, i4 %y, i32 2);
+ ret i4 %tmp;
+}
+
+define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
+; X64-LABEL: vec:
+; X64: # %bb.0:
+; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X64-NEXT: pmuludq %xmm1, %xmm0
+; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X64-NEXT: pmuludq %xmm2, %xmm1
+; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
+; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X64-NEXT: pslld $30, %xmm3
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT: psrld $2, %xmm0
+; X64-NEXT: por %xmm3, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: vec:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: shldl $30, %eax, %ebp
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: shldl $30, %eax, %ebx
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: shldl $30, %eax, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: shldl $30, %eax, %edx
+; X86-NEXT: movl %edx, 12(%ecx)
+; X86-NEXT: movl %edi, 8(%ecx)
+; X86-NEXT: movl %ebx, 4(%ecx)
+; X86-NEXT: movl %ebp, (%ecx)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+ %tmp = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 2);
+ ret <4 x i32> %tmp;
+}
+
+; These result in regular integer multiplication
+define i32 @func4(i32 %x, i32 %y) nounwind {
+; X64-LABEL: func4:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: imull %esi, %eax
+; X64-NEXT: retq
+;
+; X86-LABEL: func4:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+ %tmp = call i32 @llvm.umul.fix.i32(i32 %x, i32 %y, i32 0);
+ ret i32 %tmp;
+}
+
+define i64 @func5(i64 %x, i64 %y) nounwind {
+; X64-LABEL: func5:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: imulq %rsi, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: func5:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
+; X86-NEXT: addl %esi, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+ %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 0);
+ ret i64 %tmp;
+}
+
+define i4 @func6(i4 %x, i4 %y) nounwind {
+; X64-LABEL: func6:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: andb $15, %al
+; X64-NEXT: andb $15, %sil
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: mulb %sil
+; X64-NEXT: retq
+;
+; X86-LABEL: func6:
+; X86: # %bb.0:
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: andb $15, %al
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: andb $15, %cl
+; X86-NEXT: mulb %cl
+; X86-NEXT: retl
+ %tmp = call i4 @llvm.umul.fix.i4(i4 %x, i4 %y, i32 0);
+ ret i4 %tmp;
+}
+
+define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
+; X64-LABEL: vec2:
+; X64: # %bb.0:
+; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X64-NEXT: pmuludq %xmm1, %xmm0
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X64-NEXT: pmuludq %xmm2, %xmm1
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT: retq
+;
+; X86-LABEL: vec2:
+; X86: # %bb.0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: imull {{[0-9]+}}(%esp), %edi
+; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
+; X86-NEXT: imull {{[0-9]+}}(%esp), %edx
+; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %edi, (%eax)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: retl $4
+ %tmp = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 0);
+ ret <4 x i32> %tmp;
+}
+
+define i64 @func7(i64 %x, i64 %y) nounwind {
+; X64-LABEL: func7:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %rsi
+; X64-NEXT: shrdq $32, %rdx, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: func7:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: addl %edx, %ebx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: adcl %edi, %edx
+; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
+; X86-NEXT: addl %esi, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+ %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 32);
+ ret i64 %tmp;
+}
+
+define i64 @func8(i64 %x, i64 %y) nounwind {
+; X64-LABEL: func8:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %rsi
+; X64-NEXT: shrdq $63, %rdx, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: func8:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: addl %ebx, %ecx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: adcl %edi, %edx
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: addl %ebp, %edx
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: shldl $1, %edx, %ebx
+; X86-NEXT: shrdl $31, %edx, %eax
+; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+ %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 63);
+ ret i64 %tmp;
+}
+
+define i64 @func9(i64 %x, i64 %y) nounwind {
+; X64-LABEL: func9:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %rsi
+; X64-NEXT: movq %rdx, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: func9:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: addl %ebx, %ecx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: adcl %edi, %edx
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: addl %edx, %ebp
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+ %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 64);
+ ret i64 %tmp;
+}
More information about the llvm-commits
mailing list