[llvm] r258000 - [X86][AVX] Enable extraction of upper 128-bit subvectors for 'half undef' shuffle lowering
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 16 14:30:20 PST 2016
Author: rksimon
Date: Sat Jan 16 16:30:20 2016
New Revision: 258000
URL: http://llvm.org/viewvc/llvm-project?rev=258000&view=rev
Log:
[X86][AVX] Enable extraction of upper 128-bit subvectors for 'half undef' shuffle lowering
Added support for the extraction of the upper 128-bit subvectors for lower/upper half undef shuffles if it would reduce the number of extractions/insertions or avoid loads of AVX2 permps/permd shuffle masks.
Minor follow up to D15477.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/2012-01-12-extract-sv.ll
llvm/trunk/test/CodeGen/X86/avx-shuffle-x86_32.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=258000&r1=257999&r2=258000&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jan 16 16:30:20 2016
@@ -10425,7 +10425,7 @@ static SDValue lowerVectorShuffleWithUnd
SDValue V2, ArrayRef<int> Mask,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- assert(VT.getSizeInBits() == 256 && "Expected 256-bit vector");
+ assert(VT.is256BitVector() && "Expected 256-bit vector");
unsigned NumElts = VT.getVectorNumElements();
unsigned HalfNumElts = NumElts / 2;
@@ -10456,11 +10456,6 @@ static SDValue lowerVectorShuffleWithUnd
DAG.getIntPtrConstant(HalfNumElts, DL));
}
- // AVX2 supports efficient immediate 64-bit element cross-lane shuffles.
- if (UndefLower && Subtarget->hasAVX2() &&
- (VT == MVT::v4f64 || VT == MVT::v4i64))
- return SDValue();
-
// If the shuffle only uses the lower halves of the input operands,
// then extract them and perform the 'half' shuffle at half width.
// e.g. vector_shuffle <X, X, X, X, u, u, u, u> or <X, X, u, u>
@@ -10478,11 +10473,6 @@ static SDValue lowerVectorShuffleWithUnd
// i.e. 0 = Lower V1, 1 = Upper V1, 2 = Lower V2, 3 = Upper V2.
int HalfIdx = M / HalfNumElts;
- // Only shuffle using the lower halves of the inputs.
- // TODO: Investigate usefulness of shuffling with upper halves.
- if (HalfIdx != 0 && HalfIdx != 2)
- return SDValue();
-
// Determine the element index into its half vector source.
int HalfElt = M % HalfNumElts;
@@ -10504,6 +10494,33 @@ static SDValue lowerVectorShuffleWithUnd
}
assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length");
+ // Only shuffle the halves of the inputs when useful.
+ int NumLowerHalves =
+ (HalfIdx1 == 0 || HalfIdx1 == 2) + (HalfIdx2 == 0 || HalfIdx2 == 2);
+ int NumUpperHalves =
+ (HalfIdx1 == 1 || HalfIdx1 == 3) + (HalfIdx2 == 1 || HalfIdx2 == 3);
+
+ // uuuuXXXX - don't extract uppers just to insert again.
+ if (UndefLower && NumUpperHalves != 0)
+ return SDValue();
+
+ // XXXXuuuu - don't extract both uppers, instead shuffle and then extract.
+ if (UndefUpper && NumUpperHalves == 2)
+ return SDValue();
+
+ // AVX2 - XXXXuuuu - always extract lowers.
+ if (Subtarget->hasAVX2() && !(UndefUpper && NumUpperHalves == 0)) {
+ // AVX2 supports efficient immediate 64-bit element cross-lane shuffles.
+ if (VT == MVT::v4f64 || VT == MVT::v4i64)
+ return SDValue();
+ // AVX2 supports variable 32-bit element cross-lane shuffles.
+ if (VT == MVT::v8f32 || VT == MVT::v8i32) {
+ // XXXXuuuu - don't extract lowers and uppers.
+ if (UndefUpper && NumLowerHalves != 0 && NumUpperHalves != 0)
+ return SDValue();
+ }
+ }
+
auto GetHalfVector = [&](int HalfIdx) {
if (HalfIdx < 0)
return DAG.getUNDEF(HalfVT);
Modified: llvm/trunk/test/CodeGen/X86/2012-01-12-extract-sv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2012-01-12-extract-sv.ll?rev=258000&r1=257999&r2=258000&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2012-01-12-extract-sv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2012-01-12-extract-sv.ll Sat Jan 16 16:30:20 2016
@@ -3,7 +3,9 @@
define void @endless_loop() {
; CHECK-LABEL: endless_loop:
; CHECK-NEXT: # BB#0:
-; CHECK-NEXT: vbroadcastss (%eax), %ymm0
+; CHECK-NEXT: vmovaps (%eax), %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
Modified: llvm/trunk/test/CodeGen/X86/avx-shuffle-x86_32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-shuffle-x86_32.ll?rev=258000&r1=257999&r2=258000&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-shuffle-x86_32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-shuffle-x86_32.ll Sat Jan 16 16:30:20 2016
@@ -6,7 +6,7 @@ define <4 x i64> @test1(<4 x i64> %a) no
; CHECK-LABEL: test1:
; CHECK: # BB#0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; CHECK-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; CHECK-NEXT: retl
%b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
ret <4 x i64>%b
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=258000&r1=257999&r2=258000&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Sat Jan 16 16:30:20 2016
@@ -1153,7 +1153,7 @@ define <4 x i64> @shuffle_v4i64_22uu(<4
; AVX1-LABEL: shuffle_v4i64_22uu:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4i64_22uu:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll?rev=258000&r1=257999&r2=258000&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll Sat Jan 16 16:30:20 2016
@@ -897,17 +897,11 @@ define <8 x float> @shuffle_v8f32_1111uu
}
define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) {
-; AVX1-LABEL: shuffle_v8f32_5555uuuu:
-; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v8f32_5555uuuu:
-; AVX2: # BB#0:
-; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
-; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: retq
+; ALL-LABEL: shuffle_v8f32_5555uuuu:
+; ALL: # BB#0:
+; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0
+; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x float> %shuffle
}
@@ -2032,13 +2026,13 @@ define <8 x i32> @shuffle_v8i32_5555uuuu
; AVX1-LABEL: shuffle_v8i32_5555uuuu:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i32_5555uuuu:
; AVX2: # BB#0:
-; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
-; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i32> %shuffle
More information about the llvm-commits
mailing list