[llvm] r359491 - [X86][SSE] isHorizontalBinOp - add support for target shuffles
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 29 12:52:59 PDT 2019
Author: rksimon
Date: Mon Apr 29 12:52:59 2019
New Revision: 359491
URL: http://llvm.org/viewvc/llvm-project?rev=359491&view=rev
Log:
[X86][SSE] isHorizontalBinOp - add support for target shuffles
Add target shuffle decoding to isHorizontalBinOp as well as ISD::VECTOR_SHUFFLE support.
This does mean we can go through bitcasts so we need to bitcast the extracted args to ensure they are the correct type
Fixes PR39936 and should help with PR39920/PR39921
Differential Revision: https://reviews.llvm.org/D61245
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/haddsub.ll
llvm/trunk/test/CodeGen/X86/phaddsub.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=359491&r1=359490&r2=359491&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Apr 29 12:52:59 2019
@@ -39248,51 +39248,65 @@ static bool isHorizontalBinOp(SDValue &L
// then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >
// which is A horizontal-op B.
- // At least one of the operands should be a vector shuffle.
- if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
- RHS.getOpcode() != ISD::VECTOR_SHUFFLE)
- return false;
-
MVT VT = LHS.getSimpleValueType();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for horizontal add/sub");
+ unsigned NumElts = VT.getVectorNumElements();
+
+ auto GetShuffle = [&](SDValue Op, SDValue &N0, SDValue &N1,
+ SmallVectorImpl<int> &ShuffleMask) {
+ if (Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ if (!Op.getOperand(0).isUndef())
+ N0 = Op.getOperand(0);
+ if (!Op.getOperand(1).isUndef())
+ N1 = Op.getOperand(1);
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
+ ShuffleMask.append(Mask.begin(), Mask.end());
+ return;
+ }
+ bool IsUnary;
+ SmallVector<SDValue, 2> SrcOps;
+ SmallVector<int, 16> SrcShuffleMask;
+ SDValue BC = peekThroughBitcasts(Op);
+ if (isTargetShuffle(BC.getOpcode()) &&
+ getTargetShuffleMask(BC.getNode(), BC.getSimpleValueType(), false,
+ SrcOps, SrcShuffleMask, IsUnary) &&
+ SrcOps.size() <= 2 && SrcShuffleMask.size() == NumElts) {
+ N0 = SrcOps.size() > 0 ? SrcOps[0] : SDValue();
+ N1 = SrcOps.size() > 1 ? SrcOps[1] : SDValue();
+ ShuffleMask.append(SrcShuffleMask.begin(), SrcShuffleMask.end());
+ }
+ };
// View LHS in the form
// LHS = VECTOR_SHUFFLE A, B, LMask
// If LHS is not a shuffle, then pretend it is the identity shuffle:
// LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
// NOTE: A default initialized SDValue represents an UNDEF of type VT.
- unsigned NumElts = VT.getVectorNumElements();
SDValue A, B;
- SmallVector<int, 16> LMask(NumElts);
- if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
- if (!LHS.getOperand(0).isUndef())
- A = LHS.getOperand(0);
- if (!LHS.getOperand(1).isUndef())
- B = LHS.getOperand(1);
- ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(LHS)->getMask();
- llvm::copy(Mask, LMask.begin());
- } else {
- A = LHS;
- for (unsigned i = 0; i != NumElts; ++i)
- LMask[i] = i;
- }
+ SmallVector<int, 16> LMask;
+ GetShuffle(LHS, A, B, LMask);
// Likewise, view RHS in the form
// RHS = VECTOR_SHUFFLE C, D, RMask
SDValue C, D;
- SmallVector<int, 16> RMask(NumElts);
- if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
- if (!RHS.getOperand(0).isUndef())
- C = RHS.getOperand(0);
- if (!RHS.getOperand(1).isUndef())
- D = RHS.getOperand(1);
- ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(RHS)->getMask();
- llvm::copy(Mask, RMask.begin());
- } else {
+ SmallVector<int, 16> RMask;
+ GetShuffle(RHS, C, D, RMask);
+
+ // At least one of the operands should be a vector shuffle.
+ if (LMask.empty() && RMask.empty())
+ return false;
+
+ if (LMask.empty()) {
+ A = LHS;
+ for (unsigned i = 0; i != NumElts; ++i)
+ LMask.push_back(i);
+ }
+
+ if (RMask.empty()) {
C = RHS;
for (unsigned i = 0; i != NumElts; ++i)
- RMask[i] = i;
+ RMask.push_back(i);
}
// If A and B occur in reverse order in RHS, then canonicalize by commuting
@@ -39359,7 +39373,8 @@ static SDValue combineFaddFsub(SDNode *N
(Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, IsFadd) &&
shouldUseHorizontalOp(LHS == RHS, DAG, Subtarget))
- return DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
+ return DAG.getNode(HorizOpcode, SDLoc(N), VT, DAG.getBitcast(VT, LHS),
+ DAG.getBitcast(VT, RHS));
return SDValue();
}
@@ -42261,6 +42276,8 @@ static SDValue combineAdd(SDNode *N, Sel
ArrayRef<SDValue> Ops) {
return DAG.getNode(X86ISD::HADD, DL, Ops[0].getValueType(), Ops);
};
+ Op0 = DAG.getBitcast(VT, Op0);
+ Op1 = DAG.getBitcast(VT, Op1);
return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1},
HADDBuilder);
}
@@ -42392,6 +42409,8 @@ static SDValue combineSub(SDNode *N, Sel
ArrayRef<SDValue> Ops) {
return DAG.getNode(X86ISD::HSUB, DL, Ops[0].getValueType(), Ops);
};
+ Op0 = DAG.getBitcast(VT, Op0);
+ Op1 = DAG.getBitcast(VT, Op1);
return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1},
HSUBBuilder);
}
Modified: llvm/trunk/test/CodeGen/X86/haddsub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/haddsub.ll?rev=359491&r1=359490&r2=359491&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/haddsub.ll (original)
+++ llvm/trunk/test/CodeGen/X86/haddsub.ll Mon Apr 29 12:52:59 2019
@@ -1632,9 +1632,7 @@ define float @PR39936_v8f32(<8 x float>)
; AVX-SLOW-LABEL: PR39936_v8f32:
; AVX-SLOW: # %bb.0:
; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2]
-; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
-; AVX-SLOW-NEXT: vaddps %xmm0, %xmm2, %xmm0
+; AVX-SLOW-NEXT: vhaddps %xmm1, %xmm0, %xmm0
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3]
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,3]
; AVX-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0
@@ -1646,9 +1644,7 @@ define float @PR39936_v8f32(<8 x float>)
; AVX-FAST-LABEL: PR39936_v8f32:
; AVX-FAST: # %bb.0:
; AVX-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-FAST-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2]
-; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
-; AVX-FAST-NEXT: vaddps %xmm0, %xmm2, %xmm0
+; AVX-FAST-NEXT: vhaddps %xmm1, %xmm0, %xmm0
; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
; AVX-FAST-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/phaddsub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/phaddsub.ll?rev=359491&r1=359490&r2=359491&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/phaddsub.ll (original)
+++ llvm/trunk/test/CodeGen/X86/phaddsub.ll Mon Apr 29 12:52:59 2019
@@ -803,32 +803,51 @@ define i32 @PR39936_v8i32(<8 x i32>) {
; SSSE3-FAST-NEXT: movd %xmm0, %eax
; SSSE3-FAST-NEXT: retq
;
-; AVX-SLOW-LABEL: PR39936_v8i32:
-; AVX-SLOW: # %bb.0:
-; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2]
-; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
-; AVX-SLOW-NEXT: vpaddd %xmm0, %xmm2, %xmm0
-; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
-; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
-; AVX-SLOW-NEXT: vpaddd %xmm0, %xmm1, %xmm0
-; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-SLOW-NEXT: vpaddd %xmm0, %xmm1, %xmm0
-; AVX-SLOW-NEXT: vmovd %xmm0, %eax
-; AVX-SLOW-NEXT: vzeroupper
-; AVX-SLOW-NEXT: retq
+; AVX1-SLOW-LABEL: PR39936_v8i32:
+; AVX1-SLOW: # %bb.0:
+; AVX1-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-SLOW-NEXT: vphaddd %xmm1, %xmm0, %xmm0
+; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
+; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; AVX1-SLOW-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX1-SLOW-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX1-SLOW-NEXT: vmovd %xmm0, %eax
+; AVX1-SLOW-NEXT: vzeroupper
+; AVX1-SLOW-NEXT: retq
;
-; AVX-FAST-LABEL: PR39936_v8i32:
-; AVX-FAST: # %bb.0:
-; AVX-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-FAST-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2]
-; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
-; AVX-FAST-NEXT: vpaddd %xmm0, %xmm2, %xmm0
-; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
-; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
-; AVX-FAST-NEXT: vmovd %xmm0, %eax
-; AVX-FAST-NEXT: vzeroupper
-; AVX-FAST-NEXT: retq
+; AVX1-FAST-LABEL: PR39936_v8i32:
+; AVX1-FAST: # %bb.0:
+; AVX1-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-FAST-NEXT: vphaddd %xmm1, %xmm0, %xmm0
+; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
+; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
+; AVX1-FAST-NEXT: vmovd %xmm0, %eax
+; AVX1-FAST-NEXT: vzeroupper
+; AVX1-FAST-NEXT: retq
+;
+; AVX2-SLOW-LABEL: PR39936_v8i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-SLOW-NEXT: vphaddd %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
+; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; AVX2-SLOW-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX2-SLOW-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX2-SLOW-NEXT: vmovd %xmm0, %eax
+; AVX2-SLOW-NEXT: vzeroupper
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: PR39936_v8i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-FAST-NEXT: vphaddd %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
+; AVX2-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
+; AVX2-FAST-NEXT: vmovd %xmm0, %eax
+; AVX2-FAST-NEXT: vzeroupper
+; AVX2-FAST-NEXT: retq
%2 = shufflevector <8 x i32> %0, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
%3 = shufflevector <8 x i32> %0, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%4 = add <8 x i32> %2, %3
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=359491&r1=359490&r2=359491&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Mon Apr 29 12:52:59 2019
@@ -1732,9 +1732,7 @@ define <4 x double> @add_v4f64_0246_1357
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm0[0],ymm2[0],ymm0[2],ymm2[2]
-; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
-; AVX1-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: vhaddpd %ymm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: add_v4f64_0246_1357:
@@ -1775,9 +1773,7 @@ define <4 x double> @add_v4f64_4602_5713
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3],ymm0[2,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm0[0],ymm2[0],ymm0[2],ymm2[2]
-; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
-; AVX1-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: vhaddpd %ymm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: add_v4f64_4602_5713:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll?rev=359491&r1=359490&r2=359491&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll Mon Apr 29 12:52:59 2019
@@ -2805,9 +2805,7 @@ define <8 x float> @add_v8f32_02468ACE_1
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
-; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm2[1,3],ymm0[5,7],ymm2[5,7]
-; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: vhaddps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: add_v8f32_02468ACE_13579BDF:
@@ -2848,9 +2846,7 @@ define <8 x float> @add_v8f32_8ACE0246_9
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3],ymm0[2,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
-; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm2[1,3],ymm0[5,7],ymm2[5,7]
-; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: vhaddps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: add_v8f32_8ACE0246_9BDF1357:
More information about the llvm-commits
mailing list