[llvm] r368308 - [X86] XFormVExtractWithShuffleIntoLoad - handle shuffle mask scaling
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 8 09:05:23 PDT 2019
Author: rksimon
Date: Thu Aug 8 09:05:23 2019
New Revision: 368308
URL: http://llvm.org/viewvc/llvm-project?rev=368308&view=rev
Log:
[X86] XFormVExtractWithShuffleIntoLoad - handle shuffle mask scaling
If the target shuffle mask is from a wider type, attempt to scale the mask so that the extraction can attempt to peek through.
Fixes the regression mentioned in rL368307
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/insertps-combine.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=368308&r1=368307&r2=368308&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Aug 8 09:05:23 2019
@@ -34757,8 +34757,9 @@ SDValue X86TargetLowering::SimplifyMulti
/// folded into a single element load.
/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
/// shuffles have been custom lowered so we need to handle those here.
-static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+static SDValue
+XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -34770,13 +34771,17 @@ static SDValue XFormVExtractWithShuffleI
return SDValue();
EVT OriginalVT = InVec.getValueType();
+ unsigned NumOriginalElts = OriginalVT.getVectorNumElements();
// Peek through bitcasts, don't duplicate a load with other uses.
InVec = peekThroughOneUseBitcasts(InVec);
EVT CurrentVT = InVec.getValueType();
- if (!CurrentVT.isVector() ||
- CurrentVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
+ if (!CurrentVT.isVector())
+ return SDValue();
+
+ unsigned NumCurrentElts = CurrentVT.getVectorNumElements();
+ if ((NumOriginalElts % NumCurrentElts) != 0)
return SDValue();
if (!isTargetShuffle(InVec.getOpcode()))
@@ -34793,10 +34798,17 @@ static SDValue XFormVExtractWithShuffleI
ShuffleOps, ShuffleMask, UnaryShuffle))
return SDValue();
+ unsigned Scale = NumOriginalElts / NumCurrentElts;
+ if (Scale > 1) {
+ SmallVector<int, 16> ScaledMask;
+ scaleShuffleMask<int>(Scale, ShuffleMask, ScaledMask);
+ ShuffleMask = std::move(ScaledMask);
+ }
+ assert(ShuffleMask.size() == NumOriginalElts && "Shuffle mask size mismatch");
+
// Select the input vector, guarding against out of range extract vector.
- unsigned NumElems = CurrentVT.getVectorNumElements();
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt];
+ int Idx = (Elt > (int)NumOriginalElts) ? SM_SentinelUndef : ShuffleMask[Elt];
if (Idx == SM_SentinelZero)
return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT)
@@ -34809,8 +34821,9 @@ static SDValue XFormVExtractWithShuffleI
if (llvm::any_of(ShuffleMask, [](int M) { return M == SM_SentinelZero; }))
return SDValue();
- assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range");
- SDValue LdNode = (Idx < (int)NumElems) ? ShuffleOps[0] : ShuffleOps[1];
+ assert(0 <= Idx && Idx < (int)(2 * NumOriginalElts) &&
+ "Shuffle index out of range");
+ SDValue LdNode = (Idx < (int)NumOriginalElts) ? ShuffleOps[0] : ShuffleOps[1];
// If inputs to shuffle are the same for both ops, then allow 2 uses
unsigned AllowedUses =
@@ -34830,7 +34843,7 @@ static SDValue XFormVExtractWithShuffleI
LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
- if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
+ if (!LN0 || !LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
return SDValue();
// If there's a bitcast before the shuffle, check if the load type and
@@ -34848,10 +34861,11 @@ static SDValue XFormVExtractWithShuffleI
SDLoc dl(N);
// Create shuffle node taking into account the case that its a unary shuffle
- SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT) : ShuffleOps[1];
- Shuffle = DAG.getVectorShuffle(CurrentVT, dl, ShuffleOps[0], Shuffle,
- ShuffleMask);
- Shuffle = DAG.getBitcast(OriginalVT, Shuffle);
+ SDValue Shuffle = UnaryShuffle ? DAG.getUNDEF(OriginalVT)
+ : DAG.getBitcast(OriginalVT, ShuffleOps[1]);
+ Shuffle = DAG.getVectorShuffle(OriginalVT, dl,
+ DAG.getBitcast(OriginalVT, ShuffleOps[0]),
+ Shuffle, ShuffleMask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
EltNo);
}
Modified: llvm/trunk/test/CodeGen/X86/insertps-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insertps-combine.ll?rev=368308&r1=368307&r2=368308&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insertps-combine.ll (original)
+++ llvm/trunk/test/CodeGen/X86/insertps-combine.ll Thu Aug 8 09:05:23 2019
@@ -285,13 +285,12 @@ define float @extract_lane_insertps_5123
define float @extract_lane_insertps_6123(<4 x float> %a0, <4 x float> *%p1) {
; SSE-LABEL: extract_lane_insertps_6123:
; SSE: # %bb.0:
-; SSE-NEXT: movaps (%rdi), %xmm0
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: retq
;
; AVX-LABEL: extract_lane_insertps_6123:
; AVX: # %bb.0:
-; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: retq
%a1 = load <4 x float>, <4 x float> *%p1
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 128)
More information about the llvm-commits
mailing list