[llvm] r336762 - [X86] Remove some composite MOVSS/MOVSD isel patterns.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 10 21:51:40 PDT 2018
Author: ctopper
Date: Tue Jul 10 21:51:40 2018
New Revision: 336762
URL: http://llvm.org/viewvc/llvm-project?rev=336762&view=rev
Log:
[X86] Remove some composite MOVSS/MOVSD isel patterns.
These patterns looked for a MOVSS/SD followed by a scalar_to_vector. Or a scalar_to_vector followed by a load.
In both cases we emitted a MOVSS/SD for the MOVSS/SD part, a REG_CLASS for the scalar_to_vector, and a MOVSS/SD for the load.
But we have patterns that do each of those 3 things individually so there's no reason to build large patterns.
Most of the test changes are just reorderings. The one test that had a meaningful change is pr30430.ll and it appears to be a regression. But its doing -O0 so I think it missed a lot of opportunities and was just getting lucky before.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/build-vector-512.ll
llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll
llvm/trunk/test/CodeGen/X86/gather-addresses.ll
llvm/trunk/test/CodeGen/X86/half.ll
llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll
llvm/trunk/test/CodeGen/X86/mmx-build-vector.ll
llvm/trunk/test/CodeGen/X86/pr30430.ll
llvm/trunk/test/CodeGen/X86/sse-fcopysign.ll
llvm/trunk/test/CodeGen/X86/var-permute-128.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=336762&r1=336761&r2=336762&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Jul 10 21:51:40 2018
@@ -4344,8 +4344,6 @@ let Predicates = [HasAVX512] in {
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
(COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
- def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
(COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
def : Pat<(v4f32 (X86vzload addr:$src)),
@@ -4355,8 +4353,6 @@ let Predicates = [HasAVX512] in {
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
- def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
@@ -4426,16 +4422,9 @@ let Predicates = [HasAVX512] in {
def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
(VMOVSSZrr (v4i32 VR128X:$src1), VR128X:$src2)>;
- def : Pat<(v4f32 (X86Movss VR128X:$src1, (scalar_to_vector FR32X:$src2))),
- (VMOVSSZrr VR128X:$src1,
- (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
-
// Shuffle with VMOVSD
def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
(VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
-
- def : Pat<(v2f64 (X86Movsd VR128X:$src1, (scalar_to_vector FR64X:$src2))),
- (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
}
let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=336762&r1=336761&r2=336762&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Jul 10 21:51:40 2018
@@ -252,8 +252,6 @@ let Predicates = [UseAVX] in {
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
(COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
- def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
(COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (X86vzload addr:$src)),
@@ -263,8 +261,6 @@ let Predicates = [UseAVX] in {
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
(COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
- def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
(COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
@@ -294,15 +290,9 @@ let Predicates = [UseAVX] in {
def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
(VMOVSSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
- (VMOVSSrr VR128:$src1, (COPY_TO_REGCLASS FR32:$src2, VR128))>;
-
// Shuffle with VMOVSD
def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
(VMOVSDrr VR128:$src1, VR128:$src2)>;
-
- def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
- (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS FR64:$src2, VR128))>;
}
let Predicates = [UseSSE1] in {
@@ -318,8 +308,6 @@ let Predicates = [UseSSE1] in {
// MOVSSrm already zeros the high parts of the register.
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
(COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
- def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
(COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (X86vzload addr:$src)),
@@ -333,17 +321,12 @@ let Predicates = [UseSSE1] in {
// Shuffle with MOVSS
def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
(MOVSSrr VR128:$src1, VR128:$src2)>;
-
- def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
- (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS FR32:$src2, VR128))>;
}
let Predicates = [UseSSE2] in {
// MOVSDrm already zeros the high parts of the register.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
(COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
- def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
(COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
@@ -354,9 +337,6 @@ let Predicates = [UseSSE2] in {
// Shuffle with MOVSD
def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
(MOVSDrr VR128:$src1, VR128:$src2)>;
-
- def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
- (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS FR64:$src2, VR128))>;
}
// Aliases to help the assembler pick two byte VEX encodings by swapping the
Modified: llvm/trunk/test/CodeGen/X86/build-vector-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/build-vector-512.ll?rev=336762&r1=336761&r2=336762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/build-vector-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/build-vector-512.ll Tue Jul 10 21:51:40 2018
@@ -39,6 +39,8 @@ define <16 x float> @test_buildvector_v1
;
; AVX-64-LABEL: test_buildvector_v16f32:
; AVX-64: # %bb.0:
+; AVX-64-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
+; AVX-64-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero
; AVX-64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0]
@@ -46,12 +48,10 @@ define <16 x float> @test_buildvector_v1
; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
; AVX-64-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
+; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm9[0],mem[0],xmm9[2,3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
-; AVX-64-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
+; AVX-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm8[0],mem[0],xmm8[2,3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Modified: llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll?rev=336762&r1=336761&r2=336762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll Tue Jul 10 21:51:40 2018
@@ -107,11 +107,11 @@ define <4 x float> @test_buildvector_v4f
; SSE2: # %bb.0:
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v4f32_load:
@@ -135,9 +135,9 @@ define <4 x float> @test_buildvector_v4f
define <4 x float> @test_buildvector_v4f32_partial_load(float %f0, float %f1, float %f2, float* %p3) {
; SSE2-LABEL: test_buildvector_v4f32_partial_load:
; SSE2: # %bb.0:
+; SSE2-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/gather-addresses.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/gather-addresses.ll?rev=336762&r1=336761&r2=336762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/gather-addresses.ll (original)
+++ llvm/trunk/test/CodeGen/X86/gather-addresses.ll Tue Jul 10 21:51:40 2018
@@ -26,8 +26,8 @@ define <4 x double> @foo(double* %p, <4
; LIN-SSE2-NEXT: movslq %edx, %rdx
; LIN-SSE2-NEXT: movslq %esi, %rsi
; LIN-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; LIN-SSE2-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; LIN-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; LIN-SSE2-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; LIN-SSE2-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; LIN-SSE2-NEXT: retq
;
@@ -42,10 +42,10 @@ define <4 x double> @foo(double* %p, <4
; LIN-SSE4-NEXT: cltq
; LIN-SSE4-NEXT: movslq %ecx, %rcx
; LIN-SSE4-NEXT: movslq %edx, %rdx
+; LIN-SSE4-NEXT: movslq %esi, %rsi
; LIN-SSE4-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; LIN-SSE4-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
-; LIN-SSE4-NEXT: movslq %esi, %rax
; LIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; LIN-SSE4-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; LIN-SSE4-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; LIN-SSE4-NEXT: retq
;
@@ -60,13 +60,13 @@ define <4 x double> @foo(double* %p, <4
; WIN-SSE2-NEXT: movd %xmm1, %r10d
; WIN-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
; WIN-SSE2-NEXT: movd %xmm0, %edx
-; WIN-SSE2-NEXT: movslq %r8d, %rax
+; WIN-SSE2-NEXT: movslq %r8d, %r11
; WIN-SSE2-NEXT: movslq %r9d, %r8
-; WIN-SSE2-NEXT: movslq %r10d, %r9
+; WIN-SSE2-NEXT: movslq %r10d, %rax
; WIN-SSE2-NEXT: movslq %edx, %rdx
; WIN-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; WIN-SSE2-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; WIN-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; WIN-SSE2-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; WIN-SSE2-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; WIN-SSE2-NEXT: retq
;
@@ -79,12 +79,12 @@ define <4 x double> @foo(double* %p, <4
; WIN-SSE4-NEXT: pextrd $2, %xmm0, %r8d
; WIN-SSE4-NEXT: pextrd $3, %xmm0, %r9d
; WIN-SSE4-NEXT: cltq
-; WIN-SSE4-NEXT: movslq %edx, %rdx
-; WIN-SSE4-NEXT: movslq %r8d, %r8
+; WIN-SSE4-NEXT: movslq %edx, %r10
+; WIN-SSE4-NEXT: movslq %r8d, %rdx
+; WIN-SSE4-NEXT: movslq %r9d, %r8
; WIN-SSE4-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; WIN-SSE4-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
-; WIN-SSE4-NEXT: movslq %r9d, %rax
; WIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; WIN-SSE4-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; WIN-SSE4-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; WIN-SSE4-NEXT: retq
;
@@ -97,13 +97,13 @@ define <4 x double> @foo(double* %p, <4
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
; LIN32-NEXT: movdqa (%edx), %xmm0
; LIN32-NEXT: pand (%ecx), %xmm0
-; LIN32-NEXT: pextrd $1, %xmm0, %ecx
-; LIN32-NEXT: pextrd $2, %xmm0, %edx
-; LIN32-NEXT: pextrd $3, %xmm0, %esi
-; LIN32-NEXT: movd %xmm0, %edi
+; LIN32-NEXT: movd %xmm0, %ecx
+; LIN32-NEXT: pextrd $1, %xmm0, %edx
+; LIN32-NEXT: pextrd $2, %xmm0, %esi
+; LIN32-NEXT: pextrd $3, %xmm0, %edi
; LIN32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; LIN32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; LIN32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; LIN32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; LIN32-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; LIN32-NEXT: popl %esi
; LIN32-NEXT: popl %edi
Modified: llvm/trunk/test/CodeGen/X86/half.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/half.ll?rev=336762&r1=336761&r2=336762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/half.ll (original)
+++ llvm/trunk/test/CodeGen/X86/half.ll Tue Jul 10 21:51:40 2018
@@ -431,33 +431,33 @@ define <4 x float> @test_extend32_vec4(<
; CHECK-I686-NEXT: pushl %esi
; CHECK-I686-NEXT: subl $56, %esp
; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-I686-NEXT: movzwl 2(%esi), %eax
-; CHECK-I686-NEXT: movl %eax, (%esp)
-; CHECK-I686-NEXT: calll __gnu_h2f_ieee
-; CHECK-I686-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
; CHECK-I686-NEXT: movzwl 4(%esi), %eax
; CHECK-I686-NEXT: movl %eax, (%esp)
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
-; CHECK-I686-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
-; CHECK-I686-NEXT: movzwl 6(%esi), %eax
+; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT: movzwl 2(%esi), %eax
; CHECK-I686-NEXT: movl %eax, (%esp)
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
+; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-I686-NEXT: movzwl (%esi), %eax
; CHECK-I686-NEXT: movl %eax, (%esp)
+; CHECK-I686-NEXT: calll __gnu_h2f_ieee
+; CHECK-I686-NEXT: movzwl 6(%esi), %eax
+; CHECK-I686-NEXT: movl %eax, (%esp)
; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
-; CHECK-I686-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
+; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
-; CHECK-I686-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
+; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-I686-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; CHECK-I686-NEXT: addl $56, %esp
; CHECK-I686-NEXT: popl %esi
; CHECK-I686-NEXT: retl
@@ -474,24 +474,24 @@ define <4 x double> @test_extend64_vec4(
; CHECK-LIBCALL-NEXT: movq %rdi, %rbx
; CHECK-LIBCALL-NEXT: movzwl 4(%rdi), %edi
; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
-; CHECK-LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
+; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-LIBCALL-NEXT: movzwl 6(%rbx), %edi
; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
-; CHECK-LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
+; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-LIBCALL-NEXT: movzwl (%rbx), %edi
; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
-; CHECK-LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
+; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-LIBCALL-NEXT: movzwl 2(%rbx), %edi
; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm1
-; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
+; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0
; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload
+; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero
; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm2
-; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload
+; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero
; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm1
; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
@@ -530,24 +530,24 @@ define <4 x double> @test_extend64_vec4(
; CHECK-I686-NEXT: movzwl 6(%esi), %eax
; CHECK-I686-NEXT: movl %eax, (%esp)
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
-; CHECK-I686-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
+; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-I686-NEXT: movzwl 4(%esi), %eax
; CHECK-I686-NEXT: movl %eax, (%esp)
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
-; CHECK-I686-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
+; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-I686-NEXT: movzwl 2(%esi), %eax
; CHECK-I686-NEXT: movl %eax, (%esp)
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
-; CHECK-I686-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
+; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-I686-NEXT: movzwl (%esi), %eax
; CHECK-I686-NEXT: movl %eax, (%esp)
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp)
-; CHECK-I686-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
+; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp)
-; CHECK-I686-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
+; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp)
-; CHECK-I686-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
+; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp)
; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-I686-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
@@ -654,24 +654,24 @@ define void @test_trunc32_vec4(<4 x floa
; CHECK-I686-NEXT: pushl %edi
; CHECK-I686-NEXT: pushl %esi
; CHECK-I686-NEXT: subl $44, %esp
-; CHECK-I686-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) # 16-byte Spill
+; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp
; CHECK-I686-NEXT: movaps %xmm0, %xmm1
; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; CHECK-I686-NEXT: movss %xmm1, (%esp)
; CHECK-I686-NEXT: calll __gnu_f2h_ieee
; CHECK-I686-NEXT: movw %ax, %si
-; CHECK-I686-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-I686-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-I686-NEXT: movss %xmm0, (%esp)
; CHECK-I686-NEXT: calll __gnu_f2h_ieee
; CHECK-I686-NEXT: movw %ax, %di
-; CHECK-I686-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; CHECK-I686-NEXT: movss %xmm0, (%esp)
; CHECK-I686-NEXT: calll __gnu_f2h_ieee
; CHECK-I686-NEXT: movw %ax, %bx
-; CHECK-I686-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-I686-NEXT: movss %xmm0, (%esp)
; CHECK-I686-NEXT: calll __gnu_f2h_ieee
; CHECK-I686-NEXT: movw %ax, (%ebp)
@@ -699,7 +699,7 @@ define void @test_trunc64_vec4(<4 x doub
; BWON-NOF16C-NEXT: subq $40, %rsp
; BWON-NOF16C-NEXT: movq %rdi, %rbx
; BWON-NOF16C-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
-; BWON-NOF16C-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; BWON-NOF16C-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; BWON-NOF16C-NEXT: callq __truncdfhf2
; BWON-NOF16C-NEXT: movl %eax, %r14d
@@ -707,7 +707,7 @@ define void @test_trunc64_vec4(<4 x doub
; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; BWON-NOF16C-NEXT: callq __truncdfhf2
; BWON-NOF16C-NEXT: movl %eax, %r15d
-; BWON-NOF16C-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; BWON-NOF16C-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; BWON-NOF16C-NEXT: callq __truncdfhf2
; BWON-NOF16C-NEXT: movl %eax, %ebp
; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
@@ -732,7 +732,7 @@ define void @test_trunc64_vec4(<4 x doub
; BWOFF-NEXT: subq $40, %rsp
; BWOFF-NEXT: movq %rdi, %rbx
; BWOFF-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
-; BWOFF-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; BWOFF-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; BWOFF-NEXT: callq __truncdfhf2
; BWOFF-NEXT: movw %ax, %r14w
@@ -740,7 +740,7 @@ define void @test_trunc64_vec4(<4 x doub
; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; BWOFF-NEXT: callq __truncdfhf2
; BWOFF-NEXT: movw %ax, %r15w
-; BWOFF-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; BWOFF-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; BWOFF-NEXT: callq __truncdfhf2
; BWOFF-NEXT: movw %ax, %bp
; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
@@ -764,24 +764,24 @@ define void @test_trunc64_vec4(<4 x doub
; BWON-F16C-NEXT: pushq %rbx
; BWON-F16C-NEXT: subq $88, %rsp
; BWON-F16C-NEXT: movq %rdi, %rbx
-; BWON-F16C-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
+; BWON-F16C-NEXT: vmovupd %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; BWON-F16C-NEXT: vzeroupper
; BWON-F16C-NEXT: callq __truncdfhf2
; BWON-F16C-NEXT: movl %eax, %r14d
-; BWON-F16C-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; BWON-F16C-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
; BWON-F16C-NEXT: vextractf128 $1, %ymm0, %xmm0
-; BWON-F16C-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; BWON-F16C-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; BWON-F16C-NEXT: vzeroupper
; BWON-F16C-NEXT: callq __truncdfhf2
; BWON-F16C-NEXT: movl %eax, %r15d
-; BWON-F16C-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; BWON-F16C-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
; BWON-F16C-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; BWON-F16C-NEXT: vzeroupper
; BWON-F16C-NEXT: callq __truncdfhf2
; BWON-F16C-NEXT: movl %eax, %ebp
-; BWON-F16C-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; BWON-F16C-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; BWON-F16C-NEXT: callq __truncdfhf2
; BWON-F16C-NEXT: movw %ax, 4(%rbx)
; BWON-F16C-NEXT: movw %bp, (%rbx)
@@ -801,21 +801,21 @@ define void @test_trunc64_vec4(<4 x doub
; CHECK-I686-NEXT: pushl %edi
; CHECK-I686-NEXT: pushl %esi
; CHECK-I686-NEXT: subl $60, %esp
-; CHECK-I686-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) # 16-byte Spill
-; CHECK-I686-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) # 16-byte Spill
+; CHECK-I686-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp
; CHECK-I686-NEXT: movlps %xmm0, (%esp)
; CHECK-I686-NEXT: calll __truncdfhf2
; CHECK-I686-NEXT: movw %ax, %si
-; CHECK-I686-NEXT: movapd {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT: movapd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-I686-NEXT: movhpd %xmm0, (%esp)
; CHECK-I686-NEXT: calll __truncdfhf2
; CHECK-I686-NEXT: movw %ax, %di
-; CHECK-I686-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-I686-NEXT: movlps %xmm0, (%esp)
; CHECK-I686-NEXT: calll __truncdfhf2
; CHECK-I686-NEXT: movw %ax, %bx
-; CHECK-I686-NEXT: movapd {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT: movapd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-I686-NEXT: movhpd %xmm0, (%esp)
; CHECK-I686-NEXT: calll __truncdfhf2
; CHECK-I686-NEXT: movw %ax, 6(%ebp)
@@ -885,12 +885,12 @@ define float @test_sitofp_fadd_i32(i32 %
; CHECK-LIBCALL-NEXT: movl %edi, %ebx
; CHECK-LIBCALL-NEXT: movzwl (%rsi), %edi
; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
-; CHECK-LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
+; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-LIBCALL-NEXT: cvtsi2ssl %ebx, %xmm0
; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
; CHECK-LIBCALL-NEXT: movzwl %ax, %edi
; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
-; CHECK-LIBCALL-NEXT: addss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload
+; CHECK-LIBCALL-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; CHECK-LIBCALL-NEXT: addq $16, %rsp
; CHECK-LIBCALL-NEXT: popq %rbx
; CHECK-LIBCALL-NEXT: retq
@@ -915,7 +915,7 @@ define float @test_sitofp_fadd_i32(i32 %
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp) # 4-byte Spill
+; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-I686-NEXT: xorps %xmm0, %xmm0
; CHECK-I686-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
; CHECK-I686-NEXT: movss %xmm0, (%esp)
@@ -924,7 +924,7 @@ define float @test_sitofp_fadd_i32(i32 %
; CHECK-I686-NEXT: movl %eax, (%esp)
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
-; CHECK-I686-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # 4-byte Reload
+; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-I686-NEXT: addss {{[0-9]+}}(%esp), %xmm0
; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
Modified: llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll?rev=336762&r1=336761&r2=336762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll Tue Jul 10 21:51:40 2018
@@ -288,9 +288,9 @@ define <4 x float> @merge_4f32_f32_012u(
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X32-SSE1-NEXT: retl
;
; X32-SSE41-LABEL: merge_4f32_f32_012u:
@@ -337,9 +337,9 @@ define <4 x float> @merge_4f32_f32_019u(
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X32-SSE1-NEXT: retl
;
; X32-SSE41-LABEL: merge_4f32_f32_019u:
@@ -1198,11 +1198,11 @@ define <4 x float> @merge_4f32_f32_2345_
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE1-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X32-SSE1-NEXT: retl
;
; X32-SSE41-LABEL: merge_4f32_f32_2345_volatile:
Modified: llvm/trunk/test/CodeGen/X86/mmx-build-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-build-vector.ll?rev=336762&r1=336761&r2=336762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-build-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-build-vector.ll Tue Jul 10 21:51:40 2018
@@ -651,8 +651,8 @@ define void @build_v2f32_01(x86_mmx *%p0
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: movdq2q %xmm0, %mm0
-; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT: movdq2q %xmm1, %mm0
; X86-SSE-NEXT: movdq2q %xmm0, %mm1
; X86-SSE-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
; X86-SSE-NEXT: paddd %mm1, %mm1
Modified: llvm/trunk/test/CodeGen/X86/pr30430.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr30430.ll?rev=336762&r1=336761&r2=336762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr30430.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr30430.ll Tue Jul 10 21:51:40 2018
@@ -10,7 +10,7 @@ define <16 x float> @makefloat(float %f1
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: andq $-64, %rsp
-; CHECK-NEXT: subq $256, %rsp # imm = 0x100
+; CHECK-NEXT: subq $320, %rsp # imm = 0x140
; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero
@@ -61,50 +61,68 @@ define <16 x float> @makefloat(float %f1
; CHECK-NEXT: vmovss %xmm23, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
-; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm16 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm17 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm18 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm19 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm20 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm21 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm22 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm23 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: vmovaps %zmm21, %zmm0
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps %zmm20, %zmm0
+; CHECK-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; CHECK-NEXT: vmovaps %zmm22, %zmm1
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
-; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovaps %zmm23, %zmm1
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
-; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
-; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
-; CHECK-NEXT: # implicit-def: $ymm2
-; CHECK-NEXT: vmovaps %xmm1, %xmm2
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2
-; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
-; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovaps %zmm17, %zmm1
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovaps %zmm16, %zmm0
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; CHECK-NEXT: vmovaps %zmm18, %zmm1
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
-; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovaps %zmm19, %zmm1
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
-; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[2,3]
-; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
-; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm3[0]
+; CHECK-NEXT: # implicit-def: $ymm1
+; CHECK-NEXT: vmovaps %xmm0, %xmm1
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm4[0],xmm5[0],xmm4[2,3]
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm6[0],xmm0[3]
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm7[0]
+; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 4-byte Reload
+; CHECK-NEXT: # xmm4 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 4-byte Reload
+; CHECK-NEXT: # xmm5 = mem[0],zero,zero,zero
+; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[2,3]
+; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm4[0,1],xmm2[0],xmm4[3]
+; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
; CHECK-NEXT: # implicit-def: $ymm3
-; CHECK-NEXT: vmovaps %xmm1, %xmm3
+; CHECK-NEXT: vmovaps %xmm2, %xmm3
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm3
; CHECK-NEXT: # implicit-def: $zmm24
; CHECK-NEXT: vmovaps %zmm3, %zmm24
-; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm24, %zmm24
+; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm24, %zmm24
; CHECK-NEXT: vmovaps %zmm24, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0
-; CHECK-NEXT: vmovss %xmm15, {{[0-9]+}}(%rsp) # 4-byte Spill
-; CHECK-NEXT: vmovss %xmm8, {{[0-9]+}}(%rsp) # 4-byte Spill
-; CHECK-NEXT: vmovss %xmm9, {{[0-9]+}}(%rsp) # 4-byte Spill
-; CHECK-NEXT: vmovss %xmm10, {{[0-9]+}}(%rsp) # 4-byte Spill
-; CHECK-NEXT: vmovss %xmm11, {{[0-9]+}}(%rsp) # 4-byte Spill
-; CHECK-NEXT: vmovss %xmm12, {{[0-9]+}}(%rsp) # 4-byte Spill
-; CHECK-NEXT: vmovss %xmm13, {{[0-9]+}}(%rsp) # 4-byte Spill
+; CHECK-NEXT: vmovss %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: vmovss %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: vmovss %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: vmovss %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: vmovss %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: vmovss %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: vmovss %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: vmovss %xmm14, (%rsp) # 4-byte Spill
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
Modified: llvm/trunk/test/CodeGen/X86/sse-fcopysign.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-fcopysign.ll?rev=336762&r1=336761&r2=336762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-fcopysign.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-fcopysign.ll Tue Jul 10 21:51:40 2018
@@ -65,11 +65,11 @@ define float @int1(float %a, float %b) n
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT: andps {{\.LCPI.*}}, %xmm0
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: andps {{\.LCPI.*}}, %xmm1
-; X32-NEXT: orps %xmm0, %xmm1
-; X32-NEXT: movss %xmm1, (%esp)
+; X32-NEXT: andps {{\.LCPI.*}}, %xmm0
+; X32-NEXT: orps %xmm1, %xmm0
+; X32-NEXT: movss %xmm0, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
; X32-NEXT: retl
@@ -91,14 +91,14 @@ define double @int2(double %a, float %b,
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
-; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT: addss 20(%ebp), %xmm0
-; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT: andps {{\.LCPI.*}}, %xmm1
-; X32-NEXT: cvtss2sd %xmm0, %xmm0
+; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: addss 20(%ebp), %xmm1
; X32-NEXT: andps {{\.LCPI.*}}, %xmm0
-; X32-NEXT: orps %xmm1, %xmm0
-; X32-NEXT: movlps %xmm0, (%esp)
+; X32-NEXT: cvtss2sd %xmm1, %xmm1
+; X32-NEXT: andps {{\.LCPI.*}}, %xmm1
+; X32-NEXT: orps %xmm0, %xmm1
+; X32-NEXT: movlps %xmm1, (%esp)
; X32-NEXT: fldl (%esp)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
Modified: llvm/trunk/test/CodeGen/X86/var-permute-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/var-permute-128.ll?rev=336762&r1=336761&r2=336762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/var-permute-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/var-permute-128.ll Tue Jul 10 21:51:40 2018
@@ -436,16 +436,16 @@ define <4 x float> @var_shuffle_v4f32(<4
; SSE3-NEXT: movd %xmm1, %esi
; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE3-NEXT: andl $3, %eax
-; SSE3-NEXT: andl $3, %ecx
-; SSE3-NEXT: andl $3, %edx
-; SSE3-NEXT: andl $3, %esi
; SSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE3-NEXT: andl $3, %ecx
; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE3-NEXT: andl $3, %edx
; SSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE3-NEXT: andl $3, %esi
+; SSE3-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE3-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: var_shuffle_v4f32:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll?rev=336762&r1=336761&r2=336762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll Tue Jul 10 21:51:40 2018
@@ -73,17 +73,17 @@ define <4 x float> @var_shuffle_v4f32_v4
; SSE2-NEXT: # kill: def $esi killed $esi def $rsi
; SSE2-NEXT: # kill: def $edi killed $edi def $rdi
; SSE2-NEXT: andl $3, %edi
-; SSE2-NEXT: andl $3, %esi
-; SSE2-NEXT: andl $3, %edx
; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT: andl $3, %ecx
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: andl $3, %esi
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: andl $3, %edx
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: andl $3, %ecx
+; SSE2-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32:
@@ -93,17 +93,17 @@ define <4 x float> @var_shuffle_v4f32_v4
; SSSE3-NEXT: # kill: def $esi killed $esi def $rsi
; SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
; SSSE3-NEXT: andl $3, %edi
-; SSSE3-NEXT: andl $3, %esi
-; SSSE3-NEXT: andl $3, %edx
; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSSE3-NEXT: andl $3, %ecx
; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT: andl $3, %esi
; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT: andl $3, %edx
; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: andl $3, %ecx
+; SSSE3-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSSE3-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32:
@@ -113,11 +113,11 @@ define <4 x float> @var_shuffle_v4f32_v4
; SSE41-NEXT: # kill: def $esi killed $esi def $rsi
; SSE41-NEXT: # kill: def $edi killed $edi def $rdi
; SSE41-NEXT: andl $3, %edi
+; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-NEXT: andl $3, %esi
; SSE41-NEXT: andl $3, %edx
-; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE41-NEXT: andl $3, %ecx
-; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
@@ -130,11 +130,11 @@ define <4 x float> @var_shuffle_v4f32_v4
; AVX-NEXT: # kill: def $esi killed $esi def $rsi
; AVX-NEXT: # kill: def $edi killed $edi def $rdi
; AVX-NEXT: andl $3, %edi
+; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: andl $3, %esi
; AVX-NEXT: andl $3, %edx
-; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX-NEXT: andl $3, %ecx
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
@@ -1222,10 +1222,10 @@ define <4 x float> @var_shuffle_v4f32_v4
; SSE-NEXT: andl $3, %edi
; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: andl $3, %edx
+; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: andl $3, %ecx
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -1239,10 +1239,10 @@ define <4 x float> @var_shuffle_v4f32_v4
; AVX-NEXT: andl $3, %edi
; AVX-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
; AVX-NEXT: andl $3, %edx
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX-NEXT: andl $3, %ecx
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll?rev=336762&r1=336761&r2=336762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll Tue Jul 10 21:51:40 2018
@@ -14,15 +14,15 @@ define <4 x double> @var_shuffle_v4f64_v
; ALL-NEXT: andq $-32, %rsp
; ALL-NEXT: subq $64, %rsp
; ALL-NEXT: andl $3, %esi
-; ALL-NEXT: andl $3, %edi
; ALL-NEXT: andl $3, %ecx
; ALL-NEXT: andl $3, %edx
+; ALL-NEXT: andl $3, %edi
; ALL-NEXT: vmovaps %ymm0, (%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: movq %rbp, %rsp
; ALL-NEXT: popq %rbp
; ALL-NEXT: retq
@@ -68,15 +68,15 @@ define <4 x double> @var_shuffle_v4f64_v
; ALL-LABEL: var_shuffle_v4f64_v2f64_xxxx_i64:
; ALL: # %bb.0:
; ALL-NEXT: andl $1, %esi
-; ALL-NEXT: andl $1, %edi
; ALL-NEXT: andl $1, %ecx
; ALL-NEXT: andl $1, %edx
+; ALL-NEXT: andl $1, %edi
; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: retq
%x0 = extractelement <2 x double> %x, i64 %i0
%x1 = extractelement <2 x double> %x, i64 %i1
@@ -194,21 +194,21 @@ define <8 x float> @var_shuffle_v8f32_v8
; ALL-NEXT: movl 16(%rbp), %eax
; ALL-NEXT: andl $7, %eax
; ALL-NEXT: andl $7, %edi
+; ALL-NEXT: vmovaps %ymm0, (%rsp)
+; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ALL-NEXT: andl $7, %esi
; ALL-NEXT: andl $7, %edx
; ALL-NEXT: andl $7, %ecx
; ALL-NEXT: andl $7, %r8d
-; ALL-NEXT: vmovaps %ymm0, (%rsp)
-; ALL-NEXT: andl $7, %r9d
-; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
-; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
-; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; ALL-NEXT: andl $7, %r9d
; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
+; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: movq %rbp, %rsp
; ALL-NEXT: popq %rbp
; ALL-NEXT: retq
@@ -245,21 +245,21 @@ define <8 x float> @var_shuffle_v8f32_v4
; ALL-NEXT: movl {{[0-9]+}}(%rsp), %eax
; ALL-NEXT: andl $3, %eax
; ALL-NEXT: andl $3, %edi
+; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ALL-NEXT: andl $3, %esi
; ALL-NEXT: andl $3, %edx
; ALL-NEXT: andl $3, %ecx
; ALL-NEXT: andl $3, %r8d
-; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; ALL-NEXT: andl $3, %r9d
-; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
-; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
-; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; ALL-NEXT: andl $3, %r9d
; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
+; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: retq
%x0 = extractelement <4 x float> %x, i32 %i0
%x1 = extractelement <4 x float> %x, i32 %i1
More information about the llvm-commits
mailing list