[llvm] r352634 - [X86][AVX] Prefer to combine shuffle to broadcasts whenever possible
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 30 08:19:19 PST 2019
Author: rksimon
Date: Wed Jan 30 08:19:19 2019
New Revision: 352634
URL: http://llvm.org/viewvc/llvm-project?rev=352634&view=rev
Log:
[X86][AVX] Prefer to combine shuffle to broadcasts whenever possible
This is the first step towards improving broadcast support on AVX1 targets.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/sse3-avx-addsub-2.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=352634&r1=352633&r2=352634&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 30 08:19:19 2019
@@ -30503,7 +30503,7 @@ static bool matchUnaryShuffle(MVT MaskVT
// instructions are no slower than UNPCKLPD but has the option to
// fold the input operand into even an unaligned memory load.
if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) {
- if (!Subtarget.hasAVX2() && isTargetShuffleEquivalent(Mask, {0, 0})) {
+ if (isTargetShuffleEquivalent(Mask, {0, 0})) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v2f64;
return true;
@@ -30561,16 +30561,6 @@ static bool matchUnaryShuffle(MVT MaskVT
}
}
- // Attempt to match against broadcast-from-vector.
- if (Subtarget.hasAVX2()) {
- SmallVector<int, 64> BroadcastMask(NumMaskElts, 0);
- if (isTargetShuffleEquivalent(Mask, BroadcastMask)) {
- SrcVT = DstVT = MaskVT;
- Shuffle = X86ISD::VBROADCAST;
- return true;
- }
- }
-
return false;
}
@@ -31050,6 +31040,19 @@ static SDValue combineX86ShuffleChain(Ar
}
}
+ // Attempt to match against broadcast-from-vector.
+ // TODO: Add (partial) AVX1 support.
+ if (Subtarget.hasAVX2() && (!IsEVEXShuffle || NumRootElts == NumMaskElts)) {
+ SmallVector<int, 64> BroadcastMask(NumMaskElts, 0);
+ if (isTargetShuffleEquivalent(Mask, BroadcastMask)) {
+ if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST)
+ return SDValue(); // Nothing to do!
+ Res = DAG.getBitcast(MaskVT, V1);
+ Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
+ return DAG.getBitcast(RootVT, Res);
+ }
+ }
+
SDValue NewV1 = V1; // Save operand in case early exit happens.
if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1,
DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
Modified: llvm/trunk/test/CodeGen/X86/sse3-avx-addsub-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3-avx-addsub-2.ll?rev=352634&r1=352633&r2=352634&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3-avx-addsub-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3-avx-addsub-2.ll Wed Jan 30 08:19:19 2019
@@ -403,17 +403,29 @@ define <4 x float> @test15(<4 x float> %
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: test15:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm2
-; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
-; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovsldup {{.*#+}} xmm1 = xmm2[0,0,2,2]
-; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX-NEXT: retq
+; AVX1-LABEL: test15:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX1-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; AVX1-NEXT: vaddss %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovsldup {{.*#+}} xmm1 = xmm2[0,0,2,2]
+; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: test15:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; AVX512-NEXT: vaddss %xmm3, %xmm2, %xmm2
+; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vbroadcastss %xmm2, %xmm1
+; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX512-NEXT: retq
%1 = extractelement <4 x float> %A, i32 1
%2 = extractelement <4 x float> %B, i32 1
%add = fadd float %1, %2
More information about the llvm-commits
mailing list