[llvm] 3fd5d1c - [X86][SSE] combineTargetShuffle - permilps(shufps(load(), x)) --> permilps(shufps(x, load()))
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 24 07:25:36 PST 2020
Author: Simon Pilgrim
Date: 2020-01-24T15:23:20Z
New Revision: 3fd5d1c6e7db55209b3f03ce64ab2f25d9edea69
URL: https://github.com/llvm/llvm-project/commit/3fd5d1c6e7db55209b3f03ce64ab2f25d9edea69
DIFF: https://github.com/llvm/llvm-project/commit/3fd5d1c6e7db55209b3f03ce64ab2f25d9edea69.diff
LOG: [X86][SSE] combineTargetShuffle - permilps(shufps(load(),x)) --> permilps(shufps(x,load()))
Moves lowerShuffleWithSHUFPS commutation code from rG30fcd29fe479 to catch cases during combine
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/insertelement-duplicates.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f2e36cdb3437..0df66128b82b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -13316,12 +13316,10 @@ static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// It makes no assumptions about whether this is the *best* lowering, it simply
/// uses it.
static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
- ArrayRef<int> OriginalMask, SDValue V1,
+ ArrayRef<int> Mask, SDValue V1,
SDValue V2, SelectionDAG &DAG) {
SDValue LowV = V1, HighV = V2;
- SmallVector<int, 4> Mask(OriginalMask.begin(), OriginalMask.end());
- SmallVector<int, 4> NewMask = Mask;
-
+ SmallVector<int, 4> NewMask(Mask.begin(), Mask.end());
int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });
if (NumV2Elements == 1) {
@@ -13358,14 +13356,6 @@ static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
NewMask[V2Index] = 0; // We shifted the V2 element into V2[0].
}
} else if (NumV2Elements == 2) {
- // If we are likely to fold V1 but not V2, then commute the shuffle.
- if (MayFoldLoad(V1) && !MayFoldLoad(V2)) {
- ShuffleVectorSDNode::commuteMask(Mask);
- NewMask = Mask;
- std::swap(V1, V2);
- std::swap(LowV, HighV);
- }
-
if (Mask[0] < 4 && Mask[1] < 4) {
// Handle the easy case where we have V1 in the low lanes and V2 in the
// high lanes.
@@ -34598,6 +34588,28 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
}
}
+ // Attempt to commute shufps LHS loads:
+ // permilps(shufps(load(),x)) --> permilps(shufps(x,load()))
+ if (VT == MVT::v4f32 &&
+ (X86ISD::VPERMILPI == Opcode ||
+ (X86ISD::SHUFP == Opcode && N.getOperand(0) == N.getOperand(1)))) {
+ SDValue N0 = N.getOperand(0);
+ unsigned Imm = N.getConstantOperandVal(X86ISD::VPERMILPI == Opcode ? 1 : 2);
+ if (N0.getOpcode() == X86ISD::SHUFP && N->isOnlyUserOf(N0.getNode())) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ if (MayFoldLoad(peekThroughOneUseBitcasts(N00)) &&
+ !MayFoldLoad(peekThroughOneUseBitcasts(N01))) {
+ unsigned Imm1 = N0.getConstantOperandVal(2);
+ Imm1 = ((Imm1 & 0x0F) << 4) | ((Imm1 & 0xF0) >> 4);
+ SDValue NewN0 = DAG.getNode(X86ISD::SHUFP, DL, VT, N01, N00,
+ DAG.getTargetConstant(Imm1, DL, MVT::i8));
+ return DAG.getNode(X86ISD::SHUFP, DL, VT, NewN0, NewN0,
+ DAG.getTargetConstant(Imm ^ 0xAA, DL, MVT::i8));
+ }
+ }
+ }
+
switch (Opcode) {
case X86ISD::VBROADCAST: {
SDValue Src = N.getOperand(0);
diff --git a/llvm/test/CodeGen/X86/insertelement-duplicates.ll b/llvm/test/CodeGen/X86/insertelement-duplicates.ll
index 2f32c5a2e6b0..3f693728e6fb 100644
--- a/llvm/test/CodeGen/X86/insertelement-duplicates.ll
+++ b/llvm/test/CodeGen/X86/insertelement-duplicates.ll
@@ -9,22 +9,22 @@ define void @PR15298(<4 x float>* nocapture %source, <8 x float>* nocapture %des
; SSE-32: # %bb.0: # %L.entry
; SSE-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; SSE-32-NEXT: movaps 304(%ecx), %xmm0
+; SSE-32-NEXT: xorps %xmm0, %xmm0
; SSE-32-NEXT: xorps %xmm1, %xmm1
-; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
-; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0,1,3]
-; SSE-32-NEXT: movups %xmm1, 624(%eax)
-; SSE-32-NEXT: movups %xmm0, 608(%eax)
+; SSE-32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],mem[0,0]
+; SSE-32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
+; SSE-32-NEXT: movups %xmm0, 624(%eax)
+; SSE-32-NEXT: movups %xmm1, 608(%eax)
; SSE-32-NEXT: retl
;
; SSE-64-LABEL: PR15298:
; SSE-64: # %bb.0: # %L.entry
-; SSE-64-NEXT: movaps 304(%rdi), %xmm0
+; SSE-64-NEXT: xorps %xmm0, %xmm0
; SSE-64-NEXT: xorps %xmm1, %xmm1
-; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
-; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0,1,3]
-; SSE-64-NEXT: movups %xmm1, 624(%rsi)
-; SSE-64-NEXT: movups %xmm0, 608(%rsi)
+; SSE-64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],mem[0,0]
+; SSE-64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
+; SSE-64-NEXT: movups %xmm0, 624(%rsi)
+; SSE-64-NEXT: movups %xmm1, 608(%rsi)
; SSE-64-NEXT: retq
;
; AVX-32-LABEL: PR15298:
More information about the llvm-commits
mailing list