[llvm] r272003 - [X86][SSE] Improved blend+zero target shuffle combining to use combined shuffle mask directly
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 7 05:20:16 PDT 2016
Author: rksimon
Date: Tue Jun 7 07:20:14 2016
New Revision: 272003
URL: http://llvm.org/viewvc/llvm-project?rev=272003&view=rev
Log:
[X86][SSE] Improved blend+zero target shuffle combining to use combined shuffle mask directly
We currently only combine to blend+zero if the target value type has 8 elements or less, but this was missing a lot of cases where the combined mask had been widened.
This change makes it so we use the combined mask to determine the blend value type, allowing us to catch more widened cases.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=272003&r1=272002&r2=272003&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jun 7 07:20:14 2016
@@ -24602,23 +24602,27 @@ static bool combineX86ShuffleChain(SDVal
}
// Attempt to blend with zero.
- if (VT.getVectorNumElements() <= 8 &&
+ if (NumMaskElts <= 8 &&
((Subtarget.hasSSE41() && VT.is128BitVector()) ||
(Subtarget.hasAVX() && VT.is256BitVector()))) {
// Convert VT to a type compatible with X86ISD::BLENDI.
// TODO - add 16i16 support (requires lane duplication).
- MVT ShuffleVT = VT;
+ bool FloatDomain = VT.isFloatingPoint();
+ MVT ShuffleVT = FloatDomain ? MVT::getFloatingPointVT(MaskEltSizeInBits)
+ : MVT::getIntegerVT(MaskEltSizeInBits);
+ ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts);
+
if (Subtarget.hasAVX2()) {
- if (VT == MVT::v4i64)
+ if (ShuffleVT == MVT::v4i64)
ShuffleVT = MVT::v8i32;
- else if (VT == MVT::v2i64)
+ else if (ShuffleVT == MVT::v2i64)
ShuffleVT = MVT::v4i32;
} else {
- if (VT == MVT::v2i64 || VT == MVT::v4i32)
+ if (ShuffleVT == MVT::v2i64 || ShuffleVT == MVT::v4i32)
ShuffleVT = MVT::v8i16;
- else if (VT == MVT::v4i64)
+ else if (ShuffleVT == MVT::v4i64)
ShuffleVT = MVT::v4f64;
- else if (VT == MVT::v8i32)
+ else if (ShuffleVT == MVT::v8i32)
ShuffleVT = MVT::v8f32;
}
Modified: llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll?rev=272003&r1=272002&r2=272003&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll Tue Jun 7 07:20:14 2016
@@ -347,16 +347,11 @@ define <4 x i32> @_clearupper4xi32b(<4 x
; SSE-NEXT: pinsrw $7, %eax, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: _clearupper4xi32b:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: _clearupper4xi32b:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[4,5],zero,zero,xmm0[8,9],zero,zero,xmm0[12,13],zero,zero
-; AVX2-NEXT: retq
+; AVX-LABEL: _clearupper4xi32b:
+; AVX: # BB#0:
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
+; AVX-NEXT: retq
%x16 = bitcast <4 x i32> %0 to <8 x i16>
%r0 = insertelement <8 x i16> %x16, i16 zeroinitializer, i32 1
%r1 = insertelement <8 x i16> %r0, i16 zeroinitializer, i32 3
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll?rev=272003&r1=272002&r2=272003&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll Tue Jun 7 07:20:14 2016
@@ -107,7 +107,8 @@ define <16 x i8> @combine_vpperm_identit
define <16 x i8> @combine_vpperm_as_blend_with_zero(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: combine_vpperm_as_blend_with_zero:
; CHECK: # BB#0:
-; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6,7]
; CHECK-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 0, i8 1, i8 128, i8 129, i8 4, i8 5, i8 6, i8 7, i8 130, i8 131, i8 132, i8 133, i8 134, i8 135, i8 136, i8 137>)
ret <16 x i8> %res0
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll?rev=272003&r1=272002&r2=272003&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll Tue Jun 7 07:20:14 2016
@@ -1182,26 +1182,47 @@ define <8 x i16> @var_shuffle_v8i16_v8i1
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; SSE41-NEXT: retq
;
-; AVX-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
-; AVX: # BB#0:
-; AVX-NEXT: movswq %di, %r10
-; AVX-NEXT: movswq %si, %r11
-; AVX-NEXT: movswq %dx, %rdx
-; AVX-NEXT: movswq %cx, %rcx
-; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: movswq %r8w, %rdi
-; AVX-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: movswq %r9w, %rax
-; AVX-NEXT: movzwl -40(%rsp,%r10,2), %esi
-; AVX-NEXT: vmovd %esi, %xmm0
-; AVX-NEXT: vpinsrw $1, -24(%rsp,%r11,2), %xmm0, %xmm0
-; AVX-NEXT: vpinsrw $2, -40(%rsp,%rdx,2), %xmm0, %xmm0
-; AVX-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0
-; AVX-NEXT: vpinsrw $4, -40(%rsp,%rdi,2), %xmm0, %xmm0
-; AVX-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
-; AVX-NEXT: retq
+; AVX1-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: movswq %di, %r10
+; AVX1-NEXT: movswq %si, %r11
+; AVX1-NEXT: movswq %dx, %rdx
+; AVX1-NEXT: movswq %cx, %rcx
+; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: movswq %r8w, %rdi
+; AVX1-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: movswq %r9w, %rax
+; AVX1-NEXT: movzwl -40(%rsp,%r10,2), %esi
+; AVX1-NEXT: vmovd %esi, %xmm0
+; AVX1-NEXT: vpinsrw $1, -24(%rsp,%r11,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $2, -40(%rsp,%rdx,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $4, -40(%rsp,%rdi,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm0, %xmm0
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: movswq %di, %r10
+; AVX2-NEXT: movswq %si, %r11
+; AVX2-NEXT: movswq %dx, %rdx
+; AVX2-NEXT: movswq %cx, %rcx
+; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movswq %r8w, %rdi
+; AVX2-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movswq %r9w, %rax
+; AVX2-NEXT: movzwl -40(%rsp,%r10,2), %esi
+; AVX2-NEXT: vmovd %esi, %xmm0
+; AVX2-NEXT: vpinsrw $1, -24(%rsp,%r11,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $2, -40(%rsp,%rdx,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $4, -40(%rsp,%rdi,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
+; AVX2-NEXT: retq
%x0 = extractelement <8 x i16> %x, i16 %i0
%y1 = extractelement <8 x i16> %y, i16 %i1
%x2 = extractelement <8 x i16> %x, i16 %i2
More information about the llvm-commits
mailing list