[llvm] r207318 - X86: Lower SMUL_LOHI of v4i32 to pmuldq when SSE4.1 is available.
Robert Khasanov
rob.khasanov at gmail.com
Tue Jul 8 09:32:23 PDT 2014
Hi, Benjamin
Could you look at bug http://llvm.org/bugs/show_bug.cgi?id=20118?
Since this commit bug is reproduced.
Regards,
Robert
2014-04-26 18:12 GMT+04:00 Benjamin Kramer <benny.kra at googlemail.com>:
> Author: d0k
> Date: Sat Apr 26 09:12:19 2014
> New Revision: 207318
>
> URL: http://llvm.org/viewvc/llvm-project?rev=207318&view=rev
> Log:
> X86: Lower SMUL_LOHI of v4i32 to pmuldq when SSE4.1 is available.
>
> Modified:
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/lib/Target/X86/X86ISelLowering.h
> llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> llvm/trunk/lib/Target/X86/X86InstrSSE.td
> llvm/trunk/test/CodeGen/X86/vector-idiv.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=207318&r1=207317&r2=207318&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Apr 26 09:12:19 2014
> @@ -1062,6 +1062,7 @@ void X86TargetLowering::resetOperationAc
>
> // FIXME: Do we need to handle scalar-to-vector here?
> setOperationAction(ISD::MUL, MVT::v4i32, Legal);
> + setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
>
> setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
> setOperationAction(ISD::VSELECT, MVT::v2i64, Legal);
> @@ -1227,6 +1228,7 @@ void X86TargetLowering::resetOperationAc
> // Don't lower v32i8 because there is no 128-bit byte mul
>
> setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
> + setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
> setOperationAction(ISD::MULHU, MVT::v16i16, Legal);
> setOperationAction(ISD::MULHS, MVT::v16i16, Legal);
>
> @@ -11729,6 +11731,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
> return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
> Op.getOperand(1), Op.getOperand(2));
>
> + case Intrinsic::x86_sse41_pmuldq:
> + case Intrinsic::x86_avx2_pmul_dq:
> + return DAG.getNode(X86ISD::PMULDQ, dl, Op.getValueType(),
> + Op.getOperand(1), Op.getOperand(2));
> +
> case Intrinsic::x86_sse2_pmulhu_w:
> case Intrinsic::x86_avx2_pmulhu_w:
> return DAG.getNode(ISD::MULHU, dl, Op.getValueType(),
> @@ -13168,8 +13175,8 @@ static SDValue LowerMUL(SDValue Op, cons
> return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
> }
>
> -static SDValue LowerUMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
> - SelectionDAG &DAG) {
> +static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
> + SelectionDAG &DAG) {
> SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
> EVT VT = Op0.getValueType();
> SDLoc dl(Op);
> @@ -13185,15 +13192,17 @@ static SDValue LowerUMUL_LOHI(SDValue Op
> // Emit two multiplies, one for the lower 2 ints and one for the higher
> 2
> // ints.
> MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64;
> + unsigned Opcode =
> + Op->getOpcode() == ISD::UMUL_LOHI ? X86ISD::PMULUDQ :
> X86ISD::PMULDQ;
> SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT,
> - DAG.getNode(X86ISD::PMULUDQ, dl, MulVT, Op0,
> Op1));
> + DAG.getNode(Opcode, dl, MulVT, Op0, Op1));
> SDValue Mul2 = DAG.getNode(ISD::BITCAST, dl, VT,
> - DAG.getNode(X86ISD::PMULUDQ, dl, MulVT, Hi0,
> Hi1));
> + DAG.getNode(Opcode, dl, MulVT, Hi0, Hi1));
>
> // Shuffle it back into the right order.
> - const int HighMask[] = {1, 3, 5, 7, 9, 11, 13, 15};
> + const int HighMask[] = {1, 5, 3, 7, 9, 13, 11, 15};
> SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
> - const int LowMask[] = {0, 2, 4, 6, 8, 10, 12, 14};
> + const int LowMask[] = {0, 4, 2, 6, 8, 12, 10, 14};
> SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
>
> return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getValueType(), Highs,
> Lows);
> @@ -14188,7 +14197,8 @@ SDValue X86TargetLowering::LowerOperatio
> case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG);
> case ISD::CTTZ: return LowerCTTZ(Op, DAG);
> case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
> - case ISD::UMUL_LOHI: return LowerUMUL_LOHI(Op, Subtarget, DAG);
> + case ISD::UMUL_LOHI:
> + case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG);
> case ISD::SRA:
> case ISD::SRL:
> case ISD::SHL: return LowerShift(Op, Subtarget, DAG);
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=207318&r1=207317&r2=207318&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sat Apr 26 09:12:19 2014
> @@ -347,6 +347,8 @@ namespace llvm {
>
> // PMULUDQ - Vector multiply packed unsigned doubleword integers
> PMULUDQ,
> + // PMULUDQ - Vector multiply packed signed doubleword integers
> + PMULDQ,
>
> // FMA nodes
> FMADD,
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=207318&r1=207317&r2=207318&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sat Apr 26 09:12:19
> 2014
> @@ -175,6 +175,9 @@ def X86select : SDNode<"X86ISD::SELECT"
> def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
> SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
> SDTCisSameAs<1,2>]>>;
> +def X86pmuldq : SDNode<"X86ISD::PMULDQ",
> + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
> + SDTCisSameAs<1,2>]>>;
>
> // Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always
> get
> // translated into one of the target nodes below during lowering.
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=207318&r1=207317&r2=207318&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sat Apr 26 09:12:19 2014
> @@ -7003,6 +7003,31 @@ multiclass SS48I_binop_rm<bits<8> opc, s
> Sched<[itins.Sched.Folded, ReadAfterLd]>;
> }
>
> +/// SS48I_binop_rm2 - Simple SSE41 binary operator with different src and
> dst
> +/// types.
> +multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
> + ValueType DstVT, ValueType SrcVT, RegisterClass
> RC,
> + PatFrag memop_frag, X86MemOperand x86memop,
> + OpndItins itins,
> + bit IsCommutable = 0, bit Is2Addr = 1> {
> + let isCommutable = IsCommutable in
> + def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
> + (ins RC:$src1, RC:$src2),
> + !if(Is2Addr,
> + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
> + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1,
> $src2}")),
> + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
> + Sched<[itins.Sched]>;
> + def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
> + (ins RC:$src1, x86memop:$src2),
> + !if(Is2Addr,
> + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
> + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1,
> $src2}")),
> + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
> + (bitconvert (memop_frag
> addr:$src2)))))]>,
> + Sched<[itins.Sched.Folded, ReadAfterLd]>;
> +}
> +
> let Predicates = [HasAVX] in {
> let isCommutable = 0 in
> defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw",
> int_x86_sse41_packusdw,
> @@ -7031,8 +7056,9 @@ let Predicates = [HasAVX] in {
> defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128,
> loadv2i64, i128mem, 0,
> SSE_INTALU_ITINS_P>,
> VEX_4V;
> - defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq",
> int_x86_sse41_pmuldq,
> - 0, DEFAULT_ITINS_VECIMULSCHED>,
> VEX_4V;
> + defm VPMULDQ : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v2i64,
> v4i32,
> + VR128, loadv2i64, i128mem,
> + SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
> }
>
> let Predicates = [HasAVX2] in {
> @@ -7064,9 +7090,9 @@ let Predicates = [HasAVX2] in {
> defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256,
> loadv4i64, i256mem, 0,
> SSE_INTALU_ITINS_P>,
> VEX_4V, VEX_L;
> - defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
> - int_x86_avx2_pmul_dq,
> WriteVecIMul>,
> - VEX_4V, VEX_L;
> + defm VPMULDQY : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v4i64,
> v8i32,
> + VR256, loadv4i64, i256mem,
> + SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V,
> VEX_L;
> }
>
> let Constraints = "$src1 = $dst" in {
> @@ -7089,8 +7115,9 @@ let Constraints = "$src1 = $dst" in {
> memopv2i64, i128mem, 1,
> SSE_INTALU_ITINS_P>;
> defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
> memopv2i64, i128mem, 1,
> SSE_INTALU_ITINS_P>;
> - defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq",
> int_x86_sse41_pmuldq,
> - 1, SSE_INTMUL_ITINS_P>;
> + defm PMULDQ : SS48I_binop_rm2<0x28, "pmuldq", X86pmuldq, v2i64, v4i32,
> + VR128, memopv2i64, i128mem,
> + SSE_INTMUL_ITINS_P, 1>;
> }
>
> let Predicates = [HasAVX] in {
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-idiv.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv.ll?rev=207318&r1=207317&r2=207318&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-idiv.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-idiv.ll Sat Apr 26 09:12:19 2014
> @@ -1,4 +1,4 @@
> -; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE
> +; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
> -check-prefix=SSE
> ; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s
> -check-prefix=AVX
>
> define <4 x i32> @test1(<4 x i32> %a) {
> @@ -103,4 +103,51 @@ define <16 x i16> @test6(<16 x i16> %a)
> ; AVX-NOT: vpmulhw
> }
>
> -; TODO: sdiv -> pmuldq
> +define <16 x i8> @test7(<16 x i8> %a) {
> + %div = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8
> 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
> + ret <16 x i8> %div
> +}
> +
> +define <4 x i32> @test8(<4 x i32> %a) {
> + %div = sdiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
> + ret <4 x i32> %div
> +
> +; SSE-LABEL: test8:
> +; SSE: pmuldq
> +; SSE: pshufd $57
> +; SSE: pmuldq
> +; SSE: shufps $-35
> +; SSE: pshufd $-40
> +; SSE: padd
> +; SSE: psrld $31
> +; SSE: psrad $2
> +; SSE: padd
> +
> +; AVX-LABEL: test8:
> +; AVX: vpmuldq
> +; AVX: vpshufd $57
> +; AVX: vpmuldq
> +; AVX: vshufps $-35
> +; AVX: vpshufd $-40
> +; AVX: vpadd
> +; AVX: vpsrld $31
> +; AVX: vpsrad $2
> +; AVX: vpadd
> +}
> +
> +define <8 x i32> @test9(<8 x i32> %a) {
> + %div = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32
> 7, i32 7>
> + ret <8 x i32> %div
> +
> +; AVX-LABEL: test9:
> +; AVX: vpermd
> +; AVX: vpmuldq
> +; AVX: vshufps $-35
> +; AVX: vpmuldq
> +; AVX: vshufps $-35
> +; AVX: vpshufd $-40
> +; AVX: vpadd
> +; AVX: vpsrld $31
> +; AVX: vpsrad $2
> +; AVX: vpadd
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140708/f71397df/attachment.html>
More information about the llvm-commits
mailing list