[llvm] r364948 - [X86] Add patterns to select (scalar_to_vector (loadf32)) as (V)MOVSSrm instead of COPY_TO_REGCLASS + (V)MOVSSrm_alt.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 2 10:51:02 PDT 2019
Author: ctopper
Date: Tue Jul 2 10:51:02 2019
New Revision: 364948
URL: http://llvm.org/viewvc/llvm-project?rev=364948&view=rev
Log:
[X86] Add patterns to select (scalar_to_vector (loadf32)) as (V)MOVSSrm instead of COPY_TO_REGCLASS + (V)MOVSSrm_alt.
Similar for (V)MOVSD. Ultimately, I'd like to see about folding
scalar_to_vector+load to vzload. Which would select as (V)MOVSSrm
so this is closer to that.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/avx2-masked-gather.ll
llvm/trunk/test/CodeGen/X86/build-vector-512.ll
llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll
llvm/trunk/test/CodeGen/X86/copysign-constant-magnitude.ll
llvm/trunk/test/CodeGen/X86/fp128-cast.ll
llvm/trunk/test/CodeGen/X86/gather-addresses.ll
llvm/trunk/test/CodeGen/X86/half.ll
llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll
llvm/trunk/test/CodeGen/X86/masked_expandload.ll
llvm/trunk/test/CodeGen/X86/masked_load.ll
llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll
llvm/trunk/test/CodeGen/X86/mmx-build-vector.ll
llvm/trunk/test/CodeGen/X86/pr2656.ll
llvm/trunk/test/CodeGen/X86/pr30430.ll
llvm/trunk/test/CodeGen/X86/select-of-fp-constants.ll
llvm/trunk/test/CodeGen/X86/sse-fcopysign.ll
llvm/trunk/test/CodeGen/X86/var-permute-128.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Jul 2 10:51:02 2019
@@ -4312,6 +4312,10 @@ let Predicates = [HasAVX512, OptForSpeed
}
let Predicates = [HasAVX512] in {
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (VMOVSSZrm addr:$src)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (VMOVSDZrm addr:$src)>;
// Represent the same patterns above but in the form they appear for
// 256-bit types
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Jul 2 10:51:02 2019
@@ -263,6 +263,11 @@ let canFoldAsLoad = 1, isReMaterializabl
// Patterns
let Predicates = [UseAVX] in {
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (VMOVSSrm addr:$src)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (VMOVSDrm addr:$src)>;
+
// Represent the same patterns above but in the form they appear for
// 256-bit types
def : Pat<(v8f32 (X86vzload addr:$src)),
@@ -290,17 +295,23 @@ let Predicates = [UseAVX, OptForSize] in
(v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>;
}
-let Predicates = [UseSSE1] in {
- let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in {
- // Move scalar to XMM zero-extended, zeroing a VR128 then do a
- // MOVSS to the lower bits.
- def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
- def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
- }
+let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in {
+// Move scalar to XMM zero-extended, zeroing a VR128 then do a
+// MOVSS to the lower bits.
+def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
+def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
}
+let Predicates = [UseSSE2] in
+def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (MOVSDrm addr:$src)>;
+
+let Predicates = [UseSSE1] in
+def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (MOVSSrm addr:$src)>;
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/test/CodeGen/X86/avx2-masked-gather.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-masked-gather.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-masked-gather.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-masked-gather.ll Tue Jul 2 10:51:02 2019
@@ -494,9 +494,9 @@ define <8 x float> @masked_gather_v8floa
; NOGATHER-NEXT: je .LBB7_10
; NOGATHER-NEXT: # %bb.9: # %cond.load10
; NOGATHER-NEXT: vmovq %xmm2, %rax
-; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm4
-; NOGATHER-NEXT: vblendps {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3]
+; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
+; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; NOGATHER-NEXT: vblendps {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3]
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB7_10: # %else11
; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax
Modified: llvm/trunk/test/CodeGen/X86/build-vector-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/build-vector-512.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/build-vector-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/build-vector-512.ll Tue Jul 2 10:51:02 2019
@@ -39,8 +39,6 @@ define <16 x float> @test_buildvector_v1
;
; AVX-64-LABEL: test_buildvector_v16f32:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
-; AVX-64-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero
; AVX-64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0]
@@ -48,10 +46,12 @@ define <16 x float> @test_buildvector_v1
; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
; AVX-64-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm9[0],mem[0],xmm9[2,3]
+; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
-; AVX-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm8[0],mem[0],xmm8[2,3]
+; AVX-64-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Modified: llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll Tue Jul 2 10:51:02 2019
@@ -39,10 +39,10 @@ define <4 x float> @test_negative_zero_1
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: xorps %xmm2, %xmm2
; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
-; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
@@ -107,10 +107,10 @@ define <4 x float> @test_buildvector_v4f
; SSE2: # %bb.0:
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
@@ -136,8 +136,8 @@ define <4 x float> @test_buildvector_v4f
; SSE2-LABEL: test_buildvector_v4f32_partial_load:
; SSE2: # %bb.0:
; SSE2-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/copysign-constant-magnitude.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/copysign-constant-magnitude.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/copysign-constant-magnitude.ll (original)
+++ llvm/trunk/test/CodeGen/X86/copysign-constant-magnitude.ll Tue Jul 2 10:51:02 2019
@@ -33,17 +33,17 @@ define double @mag_neg0_double(double %x
ret double %y
}
-; CHECK: [[ONE3:L.+]]:
-; CHECK-NEXT: .quad 4607182418800017408 ## double 1
; CHECK: [[SIGNMASK3:L.+]]:
; CHECK-NEXT: .quad -9223372036854775808 ## double -0
; CHECK-NEXT: .quad -9223372036854775808 ## double -0
+; CHECK: [[ONE3:L.+]]:
+; CHECK-NEXT: .quad 4607182418800017408 ## double 1
define double @mag_pos1_double(double %x) nounwind {
; CHECK-LABEL: mag_pos1_double:
; CHECK: ## %bb.0:
-; CHECK-NEXT: movsd [[ONE3]](%rip), %xmm1
; CHECK-NEXT: andps [[SIGNMASK3]](%rip), %xmm0
+; CHECK-NEXT: movsd [[ONE3]](%rip), %xmm1
; CHECK-NEXT: orps %xmm1, %xmm0
; CHECK-NEXT: retq
;
@@ -99,19 +99,19 @@ define float @mag_neg0_float(float %x) n
ret float %y
}
-; CHECK: [[ONE7:L.+]]:
-; CHECK-NEXT: .long 1065353216 ## float 1
; CHECK: [[SIGNMASK7:L.+]]:
; CHECK-NEXT: .long 2147483648 ## float -0
; CHECK-NEXT: .long 2147483648 ## float -0
; CHECK-NEXT: .long 2147483648 ## float -0
; CHECK-NEXT: .long 2147483648 ## float -0
+; CHECK: [[ONE7:L.+]]:
+; CHECK-NEXT: .long 1065353216 ## float 1
define float @mag_pos1_float(float %x) nounwind {
; CHECK-LABEL: mag_pos1_float:
; CHECK: ## %bb.0:
-; CHECK-NEXT: movss [[ONE7]](%rip), %xmm1
; CHECK-NEXT: andps [[SIGNMASK7]](%rip), %xmm0
+; CHECK-NEXT: movss [[ONE7]](%rip), %xmm1
; CHECK-NEXT: orps %xmm1, %xmm0
; CHECK-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/fp128-cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp128-cast.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp128-cast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp128-cast.ll Tue Jul 2 10:51:02 2019
@@ -844,8 +844,8 @@ define fp128 @TestTruncCopysign(fp128 %x
; X64-NEXT: # %bb.1: # %if.then
; X64-NEXT: pushq %rax
; X64-NEXT: callq __trunctfdf2
-; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: andps {{.*}}(%rip), %xmm0
+; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: orps %xmm1, %xmm0
; X64-NEXT: callq __extenddftf2
; X64-NEXT: addq $8, %rsp
Modified: llvm/trunk/test/CodeGen/X86/gather-addresses.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/gather-addresses.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/gather-addresses.ll (original)
+++ llvm/trunk/test/CodeGen/X86/gather-addresses.ll Tue Jul 2 10:51:02 2019
@@ -26,8 +26,8 @@ define <4 x double> @foo(double* %p, <4
; LIN-SSE2-NEXT: movslq %edx, %rdx
; LIN-SSE2-NEXT: movslq %esi, %rsi
; LIN-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; LIN-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; LIN-SSE2-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; LIN-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; LIN-SSE2-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; LIN-SSE2-NEXT: retq
;
@@ -42,10 +42,10 @@ define <4 x double> @foo(double* %p, <4
; LIN-SSE4-NEXT: cltq
; LIN-SSE4-NEXT: movslq %ecx, %rcx
; LIN-SSE4-NEXT: movslq %edx, %rdx
-; LIN-SSE4-NEXT: movslq %esi, %rsi
; LIN-SSE4-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; LIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; LIN-SSE4-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; LIN-SSE4-NEXT: movslq %esi, %rax
+; LIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; LIN-SSE4-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; LIN-SSE4-NEXT: retq
;
@@ -60,13 +60,13 @@ define <4 x double> @foo(double* %p, <4
; WIN-SSE2-NEXT: movd %xmm1, %r10d
; WIN-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
; WIN-SSE2-NEXT: movd %xmm0, %edx
-; WIN-SSE2-NEXT: movslq %r8d, %r11
+; WIN-SSE2-NEXT: movslq %r8d, %rax
; WIN-SSE2-NEXT: movslq %r9d, %r8
-; WIN-SSE2-NEXT: movslq %r10d, %rax
+; WIN-SSE2-NEXT: movslq %r10d, %r9
; WIN-SSE2-NEXT: movslq %edx, %rdx
; WIN-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; WIN-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; WIN-SSE2-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; WIN-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; WIN-SSE2-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; WIN-SSE2-NEXT: retq
;
@@ -79,12 +79,12 @@ define <4 x double> @foo(double* %p, <4
; WIN-SSE4-NEXT: pextrd $2, %xmm0, %r8d
; WIN-SSE4-NEXT: pextrd $3, %xmm0, %r9d
; WIN-SSE4-NEXT: cltq
-; WIN-SSE4-NEXT: movslq %edx, %r10
-; WIN-SSE4-NEXT: movslq %r8d, %rdx
-; WIN-SSE4-NEXT: movslq %r9d, %r8
+; WIN-SSE4-NEXT: movslq %edx, %rdx
+; WIN-SSE4-NEXT: movslq %r8d, %r8
; WIN-SSE4-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; WIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; WIN-SSE4-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; WIN-SSE4-NEXT: movslq %r9d, %rax
+; WIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; WIN-SSE4-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; WIN-SSE4-NEXT: retq
;
@@ -97,13 +97,13 @@ define <4 x double> @foo(double* %p, <4
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
; LIN32-NEXT: movdqa (%edx), %xmm0
; LIN32-NEXT: pand (%ecx), %xmm0
-; LIN32-NEXT: movd %xmm0, %ecx
-; LIN32-NEXT: pextrd $1, %xmm0, %edx
-; LIN32-NEXT: pextrd $2, %xmm0, %esi
-; LIN32-NEXT: pextrd $3, %xmm0, %edi
+; LIN32-NEXT: pextrd $1, %xmm0, %ecx
+; LIN32-NEXT: pextrd $2, %xmm0, %edx
+; LIN32-NEXT: pextrd $3, %xmm0, %esi
+; LIN32-NEXT: movd %xmm0, %edi
; LIN32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; LIN32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; LIN32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; LIN32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; LIN32-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; LIN32-NEXT: popl %esi
; LIN32-NEXT: popl %edi
Modified: llvm/trunk/test/CodeGen/X86/half.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/half.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/half.ll (original)
+++ llvm/trunk/test/CodeGen/X86/half.ll Tue Jul 2 10:51:02 2019
@@ -431,18 +431,18 @@ define <4 x float> @test_extend32_vec4(<
; CHECK-I686-NEXT: pushl %esi
; CHECK-I686-NEXT: subl $56, %esp
; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-I686-NEXT: movzwl 4(%esi), %eax
+; CHECK-I686-NEXT: movzwl 2(%esi), %eax
; CHECK-I686-NEXT: movl %eax, (%esp)
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
-; CHECK-I686-NEXT: movzwl 2(%esi), %eax
+; CHECK-I686-NEXT: movzwl 4(%esi), %eax
; CHECK-I686-NEXT: movl %eax, (%esp)
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
-; CHECK-I686-NEXT: movzwl (%esi), %eax
+; CHECK-I686-NEXT: movzwl 6(%esi), %eax
; CHECK-I686-NEXT: movl %eax, (%esp)
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
-; CHECK-I686-NEXT: movzwl 6(%esi), %eax
+; CHECK-I686-NEXT: movzwl (%esi), %eax
; CHECK-I686-NEXT: movl %eax, (%esp)
; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
@@ -453,10 +453,10 @@ define <4 x float> @test_extend32_vec4(<
; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-I686-NEXT: addl $56, %esp
; CHECK-I686-NEXT: popl %esi
Modified: llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll Tue Jul 2 10:51:02 2019
@@ -168,8 +168,8 @@ define <2 x i64> @elt0_v2i64(i64 %x) {
define <4 x float> @elt1_v4f32(float %x) {
; X32SSE2-LABEL: elt1_v4f32:
; X32SSE2: # %bb.0:
-; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
+; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; X32SSE2-NEXT: retl
@@ -305,8 +305,8 @@ define <8 x i32> @elt7_v8i32(i32 %x) {
define <8 x float> @elt6_v8f32(float %x) {
; X32SSE2-LABEL: elt6_v8f32:
; X32SSE2: # %bb.0:
-; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
+; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
; X32SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
Modified: llvm/trunk/test/CodeGen/X86/masked_expandload.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_expandload.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_expandload.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_expandload.ll Tue Jul 2 10:51:02 2019
@@ -1473,8 +1473,8 @@ define <16 x float> @expandload_v16f32_c
; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm2 = xmm1[0],mem[0],xmm1[2,3]
; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
; AVX1OR2-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm1[4,5,6,7]
-; AVX1OR2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; AVX1OR2-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1OR2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; AVX1OR2-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
Modified: llvm/trunk/test/CodeGen/X86/masked_load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_load.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_load.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_load.ll Tue Jul 2 10:51:02 2019
@@ -6606,12 +6606,12 @@ define <4 x float> @mload_constmask_v4f3
; SSE2: ## %bb.0:
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE2-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
-; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[2,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm1[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,0]
; SSE2-NEXT: retq
;
; SSE42-LABEL: mload_constmask_v4f32:
@@ -7069,8 +7069,8 @@ define <8 x double> @mload_constmask_v8f
define <4 x double> @mload_constmask_v4f64_undef_passthrough(<4 x double>* %addr) {
; SSE-LABEL: mload_constmask_v4f64_undef_passthrough:
; SSE: ## %bb.0:
-; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: movups (%rdi), %xmm0
+; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: retq
;
; AVX1OR2-LABEL: mload_constmask_v4f64_undef_passthrough:
Modified: llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll Tue Jul 2 10:51:02 2019
@@ -286,10 +286,10 @@ define <4 x float> @merge_4f32_f32_012u(
; X32-SSE1-LABEL: merge_4f32_f32_012u:
; X32-SSE1: # %bb.0:
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-SSE1-NEXT: retl
;
@@ -335,10 +335,10 @@ define <4 x float> @merge_4f32_f32_019u(
; X32-SSE1-LABEL: merge_4f32_f32_019u:
; X32-SSE1: # %bb.0:
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-SSE1-NEXT: retl
;
@@ -1197,10 +1197,10 @@ define <4 x float> @merge_4f32_f32_2345_
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-SSE1-NEXT: retl
;
Modified: llvm/trunk/test/CodeGen/X86/mmx-build-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-build-vector.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-build-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-build-vector.ll Tue Jul 2 10:51:02 2019
@@ -651,8 +651,8 @@ define void @build_v2f32_01(x86_mmx *%p0
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: movdq2q %xmm1, %mm0
+; X86-SSE-NEXT: movdq2q %xmm0, %mm0
+; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movdq2q %xmm0, %mm1
; X86-SSE-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
; X86-SSE-NEXT: paddd %mm1, %mm1
Modified: llvm/trunk/test/CodeGen/X86/pr2656.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr2656.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr2656.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr2656.ll Tue Jul 2 10:51:02 2019
@@ -17,15 +17,16 @@ define void @foo(%struct.anon* byval %p)
; CHECK-LABEL: foo:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: subl $28, %esp
-; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: movaps {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; CHECK-NEXT: xorps %xmm2, %xmm0
-; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
-; CHECK-NEXT: xorps %xmm2, %xmm1
+; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: xorps %xmm0, %xmm1
; CHECK-NEXT: cvtss2sd %xmm1, %xmm1
-; CHECK-NEXT: movsd %xmm1, {{[0-9]+}}(%esp)
+; CHECK-NEXT: xorps %xmm0, %xmm2
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtss2sd %xmm2, %xmm0
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movsd %xmm1, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $_.str, (%esp)
; CHECK-NEXT: calll _printf
; CHECK-NEXT: addl $28, %esp
Modified: llvm/trunk/test/CodeGen/X86/pr30430.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr30430.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr30430.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr30430.ll Tue Jul 2 10:51:02 2019
@@ -10,7 +10,7 @@ define <16 x float> @makefloat(float %f1
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: andq $-64, %rsp
-; CHECK-NEXT: subq $320, %rsp # imm = 0x140
+; CHECK-NEXT: subq $256, %rsp # imm = 0x100
; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero
@@ -61,59 +61,41 @@ define <16 x float> @makefloat(float %f1
; CHECK-NEXT: vmovss %xmm23, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm16 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm17 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm18 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm19 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm20 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm21 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm22 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm23 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: vmovaps %zmm21, %zmm0
-; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: vmovaps %zmm20, %zmm0
-; CHECK-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
-; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; CHECK-NEXT: vmovaps %zmm22, %zmm1
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
-; CHECK-NEXT: vmovaps %zmm23, %zmm1
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
-; CHECK-NEXT: vmovaps %zmm17, %zmm1
-; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: vmovaps %zmm16, %zmm0
-; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; CHECK-NEXT: vmovaps %zmm18, %zmm1
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
+; CHECK-NEXT: # implicit-def: $ymm2
+; CHECK-NEXT: vmovaps %xmm1, %xmm2
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2
+; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
-; CHECK-NEXT: vmovaps %zmm19, %zmm1
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
-; CHECK-NEXT: # implicit-def: $ymm1
-; CHECK-NEXT: vmovaps %xmm0, %xmm1
-; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
-; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm4[0],xmm5[0],xmm4[2,3]
-; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm6[0],xmm0[3]
-; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm7[0]
-; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 4-byte Reload
-; CHECK-NEXT: # xmm4 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 4-byte Reload
-; CHECK-NEXT: # xmm5 = mem[0],zero,zero,zero
-; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[2,3]
-; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm4[0,1],xmm2[0],xmm4[3]
-; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[2,3]
+; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
+; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm3[0]
; CHECK-NEXT: # implicit-def: $ymm3
-; CHECK-NEXT: vmovaps %xmm2, %xmm3
+; CHECK-NEXT: vmovaps %xmm1, %xmm3
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm3
; CHECK-NEXT: # implicit-def: $zmm24
; CHECK-NEXT: vmovaps %zmm3, %zmm24
-; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm24, %zmm24
+; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm24, %zmm24
; CHECK-NEXT: vmovaps %zmm24, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0
; CHECK-NEXT: movq %rbp, %rsp
Modified: llvm/trunk/test/CodeGen/X86/select-of-fp-constants.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/select-of-fp-constants.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/select-of-fp-constants.ll (original)
+++ llvm/trunk/test/CodeGen/X86/select-of-fp-constants.ll Tue Jul 2 10:51:02 2019
@@ -76,9 +76,9 @@ define float @fcmp_select_fp_constants(f
;
; X64_AVX2-LABEL: fcmp_select_fp_constants:
; X64_AVX2: # %bb.0:
+; X64_AVX2-NEXT: vcmpneqss {{.*}}(%rip), %xmm0, %xmm0
; X64_AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64_AVX2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64_AVX2-NEXT: vcmpneqss {{.*}}(%rip), %xmm0, %xmm0
; X64_AVX2-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
; X64_AVX2-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/sse-fcopysign.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-fcopysign.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-fcopysign.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-fcopysign.ll Tue Jul 2 10:51:02 2019
@@ -65,11 +65,11 @@ define float @int1(float %a, float %b) n
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: andps {{\.LCPI.*}}, %xmm0
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: andps {{\.LCPI.*}}, %xmm1
-; X32-NEXT: andps {{\.LCPI.*}}, %xmm0
-; X32-NEXT: orps %xmm1, %xmm0
-; X32-NEXT: movss %xmm0, (%esp)
+; X32-NEXT: orps %xmm0, %xmm1
+; X32-NEXT: movss %xmm1, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
; X32-NEXT: retl
@@ -91,14 +91,14 @@ define double @int2(double %a, float %b,
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
-; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT: addss 20(%ebp), %xmm1
-; X32-NEXT: andps {{\.LCPI.*}}, %xmm0
-; X32-NEXT: cvtss2sd %xmm1, %xmm1
+; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: addss 20(%ebp), %xmm0
+; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: andps {{\.LCPI.*}}, %xmm1
-; X32-NEXT: orps %xmm0, %xmm1
-; X32-NEXT: movlps %xmm1, (%esp)
+; X32-NEXT: cvtss2sd %xmm0, %xmm0
+; X32-NEXT: andps {{\.LCPI.*}}, %xmm0
+; X32-NEXT: orps %xmm1, %xmm0
+; X32-NEXT: movlps %xmm0, (%esp)
; X32-NEXT: fldl (%esp)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
Modified: llvm/trunk/test/CodeGen/X86/var-permute-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/var-permute-128.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/var-permute-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/var-permute-128.ll Tue Jul 2 10:51:02 2019
@@ -436,15 +436,15 @@ define <4 x float> @var_shuffle_v4f32(<4
; SSE3-NEXT: movd %xmm1, %esi
; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE3-NEXT: andl $3, %eax
-; SSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE3-NEXT: andl $3, %ecx
-; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE3-NEXT: andl $3, %edx
-; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE3-NEXT: andl $3, %esi
+; SSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE3-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll Tue Jul 2 10:51:02 2019
@@ -73,16 +73,16 @@ define <4 x float> @var_shuffle_v4f32_v4
; SSE2-NEXT: # kill: def $esi killed $esi def $rsi
; SSE2-NEXT: # kill: def $edi killed $edi def $rdi
; SSE2-NEXT: andl $3, %edi
-; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: andl $3, %esi
-; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: andl $3, %edx
-; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE2-NEXT: andl $3, %ecx
+; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
@@ -93,16 +93,16 @@ define <4 x float> @var_shuffle_v4f32_v4
; SSSE3-NEXT: # kill: def $esi killed $esi def $rsi
; SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
; SSSE3-NEXT: andl $3, %edi
-; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: andl $3, %esi
-; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: andl $3, %edx
-; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSSE3-NEXT: andl $3, %ecx
+; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
@@ -113,11 +113,11 @@ define <4 x float> @var_shuffle_v4f32_v4
; SSE41-NEXT: # kill: def $esi killed $esi def $rsi
; SSE41-NEXT: # kill: def $edi killed $edi def $rdi
; SSE41-NEXT: andl $3, %edi
-; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-NEXT: andl $3, %esi
; SSE41-NEXT: andl $3, %edx
+; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE41-NEXT: andl $3, %ecx
+; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
@@ -130,11 +130,11 @@ define <4 x float> @var_shuffle_v4f32_v4
; AVX-NEXT: # kill: def $esi killed $esi def $rsi
; AVX-NEXT: # kill: def $edi killed $edi def $rdi
; AVX-NEXT: andl $3, %edi
-; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: andl $3, %esi
; AVX-NEXT: andl $3, %edx
+; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX-NEXT: andl $3, %ecx
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
@@ -1222,10 +1222,10 @@ define <4 x float> @var_shuffle_v4f32_v4
; SSE-NEXT: andl $3, %edi
; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: andl $3, %edx
-; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: andl $3, %ecx
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -1239,10 +1239,10 @@ define <4 x float> @var_shuffle_v4f32_v4
; AVX-NEXT: andl $3, %edi
; AVX-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
; AVX-NEXT: andl $3, %edx
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX-NEXT: andl $3, %ecx
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll?rev=364948&r1=364947&r2=364948&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll Tue Jul 2 10:51:02 2019
@@ -14,15 +14,15 @@ define <4 x double> @var_shuffle_v4f64_v
; ALL-NEXT: andq $-32, %rsp
; ALL-NEXT: subq $64, %rsp
; ALL-NEXT: andl $3, %esi
+; ALL-NEXT: andl $3, %edi
; ALL-NEXT: andl $3, %ecx
; ALL-NEXT: andl $3, %edx
-; ALL-NEXT: andl $3, %edi
; ALL-NEXT: vmovaps %ymm0, (%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
-; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
-; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: movq %rbp, %rsp
; ALL-NEXT: popq %rbp
; ALL-NEXT: retq
@@ -68,15 +68,15 @@ define <4 x double> @var_shuffle_v4f64_v
; ALL-LABEL: var_shuffle_v4f64_v2f64_xxxx_i64:
; ALL: # %bb.0:
; ALL-NEXT: andl $1, %esi
+; ALL-NEXT: andl $1, %edi
; ALL-NEXT: andl $1, %ecx
; ALL-NEXT: andl $1, %edx
-; ALL-NEXT: andl $1, %edi
; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
-; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
-; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: retq
%x0 = extractelement <2 x double> %x, i64 %i0
%x1 = extractelement <2 x double> %x, i64 %i1
@@ -194,21 +194,21 @@ define <8 x float> @var_shuffle_v8f32_v8
; ALL-NEXT: movl 16(%rbp), %eax
; ALL-NEXT: andl $7, %eax
; ALL-NEXT: andl $7, %edi
-; ALL-NEXT: vmovaps %ymm0, (%rsp)
-; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ALL-NEXT: andl $7, %esi
; ALL-NEXT: andl $7, %edx
; ALL-NEXT: andl $7, %ecx
; ALL-NEXT: andl $7, %r8d
-; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; ALL-NEXT: vmovaps %ymm0, (%rsp)
; ALL-NEXT: andl $7, %r9d
-; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
-; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
-; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
+; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
-; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
+; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
+; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
+; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: movq %rbp, %rsp
; ALL-NEXT: popq %rbp
; ALL-NEXT: retq
@@ -245,21 +245,21 @@ define <8 x float> @var_shuffle_v8f32_v4
; ALL-NEXT: movl {{[0-9]+}}(%rsp), %eax
; ALL-NEXT: andl $3, %eax
; ALL-NEXT: andl $3, %edi
-; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ALL-NEXT: andl $3, %esi
; ALL-NEXT: andl $3, %edx
; ALL-NEXT: andl $3, %ecx
; ALL-NEXT: andl $3, %r8d
-; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; ALL-NEXT: andl $3, %r9d
-; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
-; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
-; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
+; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
-; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
+; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
+; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
+; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: retq
%x0 = extractelement <4 x float> %x, i32 %i0
%x1 = extractelement <4 x float> %x, i32 %i1
More information about the llvm-commits
mailing list