[llvm-commits] [llvm] r154396 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrFragmentsSIMD.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx-shuffle.ll test/CodeGen/X86/vec_shuffle
David Blaikie
dblaikie at gmail.com
Tue Apr 10 08:24:34 PDT 2012
On Tue, Apr 10, 2012 at 7:33 AM, Nadav Rotem <nadav.rotem at intel.com> wrote:
> Author: nadav
> Date: Tue Apr 10 09:33:13 2012
> New Revision: 154396
>
> URL: http://llvm.org/viewvc/llvm-project?rev=154396&view=rev
> Log:
> Modify the code that lowers shuffles to blends from using blendvXX to vblendXX.
> blendv uses a register for the selection while vblend uses an immediate.
> On sandybridge they still have the same latency and execute on the same execution ports.
>
>
> Modified:
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/lib/Target/X86/X86ISelLowering.h
> llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> llvm/trunk/lib/Target/X86/X86InstrSSE.td
> llvm/trunk/test/CodeGen/X86/avx-shuffle.ll
> llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=154396&r1=154395&r2=154396&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Apr 10 09:33:13 2012
> @@ -5391,59 +5391,76 @@
> SDValue V1 = SVOp->getOperand(0);
> SDValue V2 = SVOp->getOperand(1);
> DebugLoc dl = SVOp->getDebugLoc();
> - LLVMContext *Context = DAG.getContext();
> EVT VT = Op.getValueType();
> EVT InVT = V1.getValueType();
> EVT EltVT = VT.getVectorElementType();
This variable became unused with your change, causing Clang selfhost
to warn. I removed the variable to fix this in r154398.
- David
> - unsigned EltSize = EltVT.getSizeInBits();
> int MaskSize = VT.getVectorNumElements();
> int InSize = InVT.getVectorNumElements();
>
> - // TODO: At the moment we only use AVX blends. We could also use SSE4 blends.
> - if (!Subtarget->hasAVX())
> + if (!Subtarget->hasSSE41())
> return SDValue();
>
> if (MaskSize != InSize)
> return SDValue();
>
> - SmallVector<Constant*,2> MaskVals;
> - ConstantInt *Zero = ConstantInt::get(*Context, APInt(EltSize, 0));
> - ConstantInt *NegOne = ConstantInt::get(*Context, APInt(EltSize, -1));
> + int ISDNo = 0;
> + MVT OpTy;
> +
> + switch (VT.getSimpleVT().SimpleTy) {
> + default: return SDValue();
> + case MVT::v8i16:
> + ISDNo = X86ISD::BLENDPW;
> + OpTy = MVT::v8i16;
> + break;
> + case MVT::v4i32:
> + case MVT::v4f32:
> + ISDNo = X86ISD::BLENDPS;
> + OpTy = MVT::v4f32;
> + break;
> + case MVT::v2i64:
> + case MVT::v2f64:
> + ISDNo = X86ISD::BLENDPD;
> + OpTy = MVT::v2f64;
> + break;
> + case MVT::v8i32:
> + case MVT::v8f32:
> + if (!Subtarget->hasAVX())
> + return SDValue();
> + ISDNo = X86ISD::BLENDPS;
> + OpTy = MVT::v8f32;
> + break;
> + case MVT::v4i64:
> + case MVT::v4f64:
> + if (!Subtarget->hasAVX())
> + return SDValue();
> + ISDNo = X86ISD::BLENDPD;
> + OpTy = MVT::v4f64;
> + break;
> + case MVT::v16i16:
> + if (!Subtarget->hasAVX2())
> + return SDValue();
> + ISDNo = X86ISD::BLENDPW;
> + OpTy = MVT::v16i16;
> + break;
> + }
> + assert(ISDNo && "Invalid Op Number");
> +
> + unsigned MaskVals = 0;
>
> for (int i = 0; i < MaskSize; ++i) {
> int EltIdx = SVOp->getMaskElt(i);
> if (EltIdx == i || EltIdx == -1)
> - MaskVals.push_back(NegOne);
> + MaskVals |= (1<<i);
> else if (EltIdx == (i + MaskSize))
> - MaskVals.push_back(Zero);
> + continue; // Bit is set to zero;
> else return SDValue();
> }
>
> - Constant *MaskC = ConstantVector::get(MaskVals);
> - EVT MaskTy = EVT::getEVT(MaskC->getType());
> - assert(MaskTy.getSizeInBits() == VT.getSizeInBits() && "Invalid mask size");
> - SDValue MaskIdx = DAG.getConstantPool(MaskC, PtrTy);
> - unsigned Alignment = cast<ConstantPoolSDNode>(MaskIdx)->getAlignment();
> - SDValue Mask = DAG.getLoad(MaskTy, dl, DAG.getEntryNode(), MaskIdx,
> - MachinePointerInfo::getConstantPool(),
> - false, false, false, Alignment);
> -
> - if (Subtarget->hasAVX2() && MaskTy == MVT::v32i8)
> - return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2);
> -
> - if (Subtarget->hasAVX()) {
> - switch (MaskTy.getSimpleVT().SimpleTy) {
> - default: return SDValue();
> - case MVT::v16i8:
> - case MVT::v4i32:
> - case MVT::v2i64:
> - case MVT::v8i32:
> - case MVT::v4i64:
> - return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2);
> - }
> - }
> -
> - return SDValue();
> + V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1);
> + V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2);
> + SDValue Ret = DAG.getNode(ISDNo, dl, OpTy, V1, V2,
> + DAG.getConstant(MaskVals, MVT::i32));
> + return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
> }
>
> // v8i16 shuffles - Prefer shuffles in the following order:
> @@ -11050,6 +11067,9 @@
> case X86ISD::ANDNP: return "X86ISD::ANDNP";
> case X86ISD::PSIGN: return "X86ISD::PSIGN";
> case X86ISD::BLENDV: return "X86ISD::BLENDV";
> + case X86ISD::BLENDPW: return "X86ISD::BLENDPW";
> + case X86ISD::BLENDPS: return "X86ISD::BLENDPS";
> + case X86ISD::BLENDPD: return "X86ISD::BLENDPD";
> case X86ISD::HADD: return "X86ISD::HADD";
> case X86ISD::HSUB: return "X86ISD::HSUB";
> case X86ISD::FHADD: return "X86ISD::FHADD";
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=154396&r1=154395&r2=154396&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Apr 10 09:33:13 2012
> @@ -175,9 +175,14 @@
> /// PSIGN - Copy integer sign.
> PSIGN,
>
> - /// BLEND family of opcodes
> + /// BLENDV - Blend where the selector is an XMM.
> BLENDV,
>
> + /// BLENDxx - Blend where the selector is an immediate.
> + BLENDPW,
> + BLENDPS,
> + BLENDPD,
> +
> /// HADD - Integer horizontal add.
> HADD,
>
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=154396&r1=154395&r2=154396&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Apr 10 09:33:13 2012
> @@ -126,6 +126,8 @@
> SDTCisSameAs<0,2>, SDTCisInt<3>]>;
>
> def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
> +def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
> +SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
>
> def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
>
> @@ -158,6 +160,10 @@
>
> def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
>
> +def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>;
> +def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>;
> +def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>;
> +
> //===----------------------------------------------------------------------===//
> // SSE Complex Patterns
> //===----------------------------------------------------------------------===//
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=154396&r1=154395&r2=154396&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Apr 10 09:33:13 2012
> @@ -6735,12 +6735,22 @@
> def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
> (v4f64 VR256:$src2))),
> (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
> +
> + def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2),
> + (imm:$mask))),
> + (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>;
> + def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2),
> + (imm:$mask))),
> + (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>;
> }
>
> let Predicates = [HasAVX2] in {
> def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
> (v32i8 VR256:$src2))),
> (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
> + def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2),
> + (imm:$mask))),
> + (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>;
> }
>
> /// SS41I_ternary_int - SSE 4.1 ternary operator
> @@ -6789,6 +6799,17 @@
> def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
> (v2f64 VR128:$src2))),
> (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
> +
> + def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
> + (imm:$mask))),
> + (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
> + def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
> + (imm:$mask))),
> + (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
> + def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
> + (imm:$mask))),
> + (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
> +
> }
>
> let Predicates = [HasAVX] in
>
> Modified: llvm/trunk/test/CodeGen/X86/avx-shuffle.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-shuffle.ll?rev=154396&r1=154395&r2=154396&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx-shuffle.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx-shuffle.ll Tue Apr 10 09:33:13 2012
> @@ -164,7 +164,7 @@
> }
>
> ; CHECK: blend1
> -; CHECK: vblendvps
> +; CHECK: vblendps
> ; CHECK: ret
> define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
> %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
> @@ -172,7 +172,7 @@
> }
>
> ; CHECK: blend2
> -; CHECK: vblendvps
> +; CHECK: vblendps
> ; CHECK: ret
> define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
> %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
> @@ -180,7 +180,7 @@
> }
>
> ; CHECK: blend2a
> -; CHECK: vblendvps
> +; CHECK: vblendps
> ; CHECK: ret
> define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline {
> %t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
> @@ -188,7 +188,7 @@
> }
>
> ; CHECK: blend3
> -; CHECK-NOT: vblendvps
> +; CHECK-NOT: vblendps
> ; CHECK: ret
> define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
> %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
> @@ -196,7 +196,7 @@
> }
>
> ; CHECK: blend4
> -; CHECK: vblendvpd
> +; CHECK: vblendpd
> ; CHECK: ret
> define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
> %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
>
> Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll?rev=154396&r1=154395&r2=154396&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll Tue Apr 10 09:33:13 2012
> @@ -1,4 +1,4 @@
> -; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
> +; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2
>
> define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind {
> entry:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list