[llvm] r257046 - [X86][SSE} Add INSERTPS as a target shuffle
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 7 02:24:19 PST 2016
Author: rksimon
Date: Thu Jan 7 04:24:19 2016
New Revision: 257046
URL: http://llvm.org/viewvc/llvm-project?rev=257046&view=rev
Log:
[X86][SSE} Add INSERTPS as a target shuffle
Follow up to D15378, added INSERTPS to the list of decodable target shuffles and enabled XFormVExtractWithShuffleIntoLoad to handle target shuffles with SentinelZero and tested this with INSERTPS.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/insertps-combine.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=257046&r1=257045&r2=257046&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jan 7 04:24:19 2016
@@ -3907,6 +3907,7 @@ static bool isTargetShuffle(unsigned Opc
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::SHUFP:
+ case X86ISD::INSERTPS:
case X86ISD::PALIGNR:
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
@@ -4760,6 +4761,11 @@ static bool getTargetShuffleMask(SDNode
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
+ case X86ISD::INSERTPS:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
+ break;
case X86ISD::UNPCKH:
DecodeUNPCKHMask(VT, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
@@ -23860,6 +23866,7 @@ static SDValue XFormVExtractWithShuffleI
SDValue InVec = N->getOperand(0);
SDValue EltNo = N->getOperand(1);
+ EVT EltVT = N->getValueType(0);
if (!isa<ConstantSDNode>(EltNo))
return SDValue();
@@ -23888,14 +23895,22 @@ static SDValue XFormVExtractWithShuffleI
SmallVector<int, 16> ShuffleMask;
bool UnaryShuffle;
- if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(),
- false, ShuffleMask, UnaryShuffle))
+ if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), true,
+ ShuffleMask, UnaryShuffle))
return SDValue();
// Select the input vector, guarding against out of range extract vector.
unsigned NumElems = CurrentVT.getVectorNumElements();
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt];
+
+ if (Idx == SM_SentinelZero)
+ return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT)
+ : DAG.getConstantFP(+0.0, SDLoc(N), EltVT);
+ if (Idx == SM_SentinelUndef)
+ return DAG.getUNDEF(EltVT);
+
+ assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range");
SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
: InVec.getOperand(1);
@@ -23920,7 +23935,6 @@ static SDValue XFormVExtractWithShuffleI
if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
return SDValue();
- EVT EltVT = N->getValueType(0);
// If there's a bitcast before the shuffle, check if the load type and
// alignment is valid.
unsigned Align = LN0->getAlignment();
Modified: llvm/trunk/test/CodeGen/X86/insertps-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insertps-combine.ll?rev=257046&r1=257045&r2=257046&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insertps-combine.ll (original)
+++ llvm/trunk/test/CodeGen/X86/insertps-combine.ll Thu Jan 7 04:24:19 2016
@@ -109,3 +109,36 @@ define <4 x float> @shuffle_v4f32_0z6z(<
%vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
ret <4 x float> %vecinit4
}
+
+define float @extract_zero_insertps_z0z7(<4 x float> %a0, <4 x float> %a1) {
+; SSE-LABEL: extract_zero_insertps_z0z7:
+; SSE: # BB#0:
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: extract_zero_insertps_z0z7:
+; AVX: # BB#0:
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 21)
+ %ext = extractelement <4 x float> %res, i32 0
+ ret float %ext
+}
+
+define float @extract_lane_insertps_5123(<4 x float> %a0, <4 x float> *%p1) {
+; SSE-LABEL: extract_lane_insertps_5123:
+; SSE: # BB#0:
+; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT: retq
+;
+; AVX-LABEL: extract_lane_insertps_5123:
+; AVX: # BB#0:
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT: retq
+ %a1 = load <4 x float>, <4 x float> *%p1
+ %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 128)
+ %ext = extractelement <4 x float> %res, i32 0
+ ret float %ext
+}
+
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
More information about the llvm-commits
mailing list