[llvm] r296966 - [X86][SSE] Enable post-legalize vXi64 shuffle combining on 32-bit targets
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 4 04:50:47 PST 2017
Author: rksimon
Date: Sat Mar 4 06:50:47 2017
New Revision: 296966
URL: http://llvm.org/viewvc/llvm-project?rev=296966&view=rev
Log:
[X86][SSE] Enable post-legalize vXi64 shuffle combining on 32-bit targets
Long ago (2010 according to svn blame), combineShuffle probably needed to prevent the accidental creation of illegal i64 types but there doesn't appear to be any combines that can cause this any more as they all have their own legality checks.
Differential Revision: https://reviews.llvm.org/D30213
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx-shuffle-x86_32.ll
llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=296966&r1=296965&r2=296966&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Mar 4 06:50:47 2017
@@ -28380,12 +28380,7 @@ static SDValue combineShuffle(SDNode *N,
const X86Subtarget &Subtarget) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
-
- // Don't create instructions with illegal types after legalize types has run.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
- return SDValue();
-
// If we have legalized the vector types, look for blends of FADD and FSUB
// nodes that we can fuse into an ADDSUB node.
if (TLI.isTypeLegal(VT))
Modified: llvm/trunk/test/CodeGen/X86/avx-shuffle-x86_32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-shuffle-x86_32.ll?rev=296966&r1=296965&r2=296966&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-shuffle-x86_32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-shuffle-x86_32.ll Sat Mar 4 06:50:47 2017
@@ -16,8 +16,7 @@ define <8 x i16> @test2(<4 x i16>* %v) n
; CHECK-LABEL: test2:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: retl
%v9 = load <4 x i16>, <4 x i16> * %v, align 8
%v10 = shufflevector <4 x i16> %v9, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
Modified: llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll?rev=296966&r1=296965&r2=296966&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll Sat Mar 4 06:50:47 2017
@@ -12,7 +12,7 @@ define i64 @extract_any_extend_vector_in
; X32-SSE-NEXT: andl $-128, %esp
; X32-SSE-NEXT: subl $384, %esp # imm = 0x180
; X32-SSE-NEXT: movl 88(%ebp), %ecx
-; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
+; X32-SSE-NEXT: movdqa 72(%ebp), %xmm0
; X32-SSE-NEXT: xorps %xmm1, %xmm1
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
@@ -21,7 +21,7 @@ define i64 @extract_any_extend_vector_in
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
+; X32-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
; X32-SSE-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll?rev=296966&r1=296965&r2=296966&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll Sat Mar 4 06:50:47 2017
@@ -499,7 +499,7 @@ define <4 x i64> @combine_pshufb_as_zext
; X32-LABEL: combine_pshufb_as_zext128:
; X32: # BB#0:
; X32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; X32-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,14],zero,zero,zero,zero,zero,zero,ymm0[13,12],zero,zero,zero,zero,zero,zero,ymm0[31,30],zero,zero,zero,zero,zero,zero,ymm0[29,28],zero,zero,zero,zero,zero,zero
; X32-NEXT: retl
;
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll?rev=296966&r1=296965&r2=296966&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll Sat Mar 4 06:50:47 2017
@@ -66,10 +66,6 @@ define <8 x double> @combine_permvar_8f6
define <8 x i64> @combine_permvar_8i64_identity(<8 x i64> %x0, <8 x i64> %x1) {
; X32-LABEL: combine_permvar_8i64_identity:
; X32: # BB#0:
-; X32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
-; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
-; X32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
-; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_permvar_8i64_identity:
@@ -189,10 +185,6 @@ define <8 x double> @combine_vpermt2var_
define <8 x i64> @combine_vpermt2var_8i64_identity(<8 x i64> %x0, <8 x i64> %x1) {
; X32-LABEL: combine_vpermt2var_8i64_identity:
; X32: # BB#0:
-; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = <u,u,6,0,5,0,4,0,3,0,2,0,1,0,0,0>
-; X32-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = <u,u,14,0,5,0,12,0,3,0,10,0,1,0,8,0>
-; X32-NEXT: vpermi2q %zmm2, %zmm2, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_8i64_identity:
@@ -648,8 +640,7 @@ define <16 x i32> @combine_permvar_as_vp
define <8 x i64> @combine_permvar_as_vpbroadcastq512(<8 x i64> %x0) {
; X32-LABEL: combine_permvar_as_vpbroadcastq512:
; X32: # BB#0:
-; X32-NEXT: vpxord %zmm1, %zmm1, %zmm1
-; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; X32-NEXT: vbroadcastsd %xmm0, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_permvar_as_vpbroadcastq512:
@@ -663,8 +654,7 @@ define <8 x i64> @combine_permvar_as_vpb
define <8 x i64> @combine_permvar_8i64_as_permq(<8 x i64> %x0, <8 x i64> %x1) {
; X32-LABEL: combine_permvar_8i64_as_permq:
; X32: # BB#0:
-; X32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <3,0,2,0,1,0,u,u,u,u,6,0,5,0,4,0>
-; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; X32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
; X32-NEXT: retl
;
; X64-LABEL: combine_permvar_8i64_as_permq:
@@ -679,8 +669,7 @@ define <8 x i64> @combine_permvar_8i64_a
; X32: # BB#0:
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovd %eax, %k1
-; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = <3,0,2,0,1,0,u,u,u,u,6,0,5,0,4,0>
-; X32-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1}
+; X32-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4]
; X32-NEXT: vmovdqa64 %zmm1, %zmm0
; X32-NEXT: retl
;
@@ -872,10 +861,6 @@ define <8 x double> @combine_vpermi2var_
define <8 x i64> @combine_vpermi2var_8i64_identity(<8 x i64> %x0, <8 x i64> %x1) {
; X32-LABEL: combine_vpermi2var_8i64_identity:
; X32: # BB#0:
-; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = <u,u,6,0,5,0,4,0,3,0,2,0,1,0,0,0>
-; X32-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = <u,u,14,0,5,0,12,0,3,0,10,0,1,0,8,0>
-; X32-NEXT: vpermi2q %zmm2, %zmm2, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_8i64_identity:
@@ -973,10 +958,8 @@ define <8 x double> @combine_vpermi2var_
define <8 x i64> @combine_vpermt2var_8i64_as_vpermq(<8 x i64> %x0, <8 x i64> %x1) {
; X32-LABEL: combine_vpermt2var_8i64_as_vpermq:
; X32: # BB#0:
-; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0]
-; X32-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [12,0,5,0,14,0,7,0,8,0,1,0,10,0,3,0]
-; X32-NEXT: vpermi2q %zmm2, %zmm2, %zmm0
+; X32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
+; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_8i64_as_vpermq:
@@ -1136,9 +1119,7 @@ define <16 x float> @combine_vpermi2var_
define <8 x i64> @combine_vpermvar_insertion_as_broadcast_v8i64(i64 %a0) {
; X32-LABEL: combine_vpermvar_insertion_as_broadcast_v8i64:
; X32: # BB#0:
-; X32-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: vpxord %zmm1, %zmm1, %zmm1
-; X32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermvar_insertion_as_broadcast_v8i64:
More information about the llvm-commits
mailing list