[llvm] r343858 - [X86][SSE] Try to make MOVLPS/MOVHPS(+PD) instructions SimplifyDemandedElts proof
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 5 08:50:18 PDT 2018
Author: rksimon
Date: Fri Oct 5 08:50:18 2018
New Revision: 343858
URL: http://llvm.org/viewvc/llvm-project?rev=343858&view=rev
Log:
[X86][SSE] Try to make MOVLPS/MOVHPS(+PD) instructions SimplifyDemandedElts proof
Fix for D52912 which was simplifying MOVLPS/MOVHPS(+PD) instructions as the tests were only touching one of the vector halfs
Modified:
llvm/trunk/test/CodeGen/X86/sse-schedule.ll
llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
Modified: llvm/trunk/test/CodeGen/X86/sse-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-schedule.ll?rev=343858&r1=343857&r2=343858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-schedule.ll Fri Oct 5 08:50:18 2018
@@ -2707,37 +2707,42 @@ define <4 x float> @test_movhlps(<4 x fl
; TODO (v)movhps
-define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
+define <4 x float> @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
; GENERIC-LABEL: test_movhps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
+; GENERIC-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movhps:
; ATOM: # %bb.0:
+; ATOM-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50]
; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
-; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; ATOM-NEXT: addps %xmm1, %xmm2 # sched: [5:5.00]
+; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] sched: [1:1.00]
+; ATOM-NEXT: movlps %xmm2, (%rdi) # sched: [1:1.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_movhps:
; SLM: # %bb.0:
; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
-; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
+; SLM-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_movhps:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
+; SANDY-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movhps:
@@ -2745,14 +2750,16 @@ define void @test_movhps(<4 x float> %a0
; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movhps:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; HASWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; HASWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
+; HASWELL-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_movhps:
@@ -2760,14 +2767,16 @@ define void @test_movhps(<4 x float> %a0
; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_movhps:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_movhps:
@@ -2775,14 +2784,16 @@ define void @test_movhps(<4 x float> %a0
; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_movhps:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movhps:
@@ -2790,14 +2801,16 @@ define void @test_movhps(<4 x float> %a0
; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_movhps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
+; SKX-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movhps:
@@ -2805,14 +2818,16 @@ define void @test_movhps(<4 x float> %a0
; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movhps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50]
-; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:0.50]
+; BTVER2-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_movhps:
@@ -2820,14 +2835,16 @@ define void @test_movhps(<4 x float> %a0
; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [3:1.00]
+; BTVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_movhps:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_movhps:
@@ -2835,6 +2852,7 @@ define void @test_movhps(<4 x float> %a0
; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:3.00]
+; ZNVER1-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = bitcast x86_mmx* %a2 to <2 x float>*
%2 = load <2 x float>, <2 x float> *%1, align 8
@@ -2843,7 +2861,7 @@ define void @test_movhps(<4 x float> %a0
%5 = fadd <4 x float> %a0, %4
%6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3>
store <2 x float> %6, <2 x float>* %1
- ret void
+ ret <4 x float> %4
}
; TODO (v)movlhps
@@ -2955,33 +2973,38 @@ define <4 x float> @test_movlhps(<4 x fl
ret <4 x float> %2
}
-define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
+define <4 x float> @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
; GENERIC-LABEL: test_movlps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movlps:
; ATOM: # %bb.0:
+; ATOM-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50]
; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
-; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; ATOM-NEXT: addps %xmm1, %xmm2 # sched: [5:5.00]
+; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: movlps %xmm2, (%rdi) # sched: [1:1.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_movlps:
; SLM: # %bb.0:
; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
-; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_movlps:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movlps:
@@ -2989,13 +3012,15 @@ define void @test_movlps(<4 x float> %a0
; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movlps:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; HASWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_movlps:
@@ -3003,13 +3028,15 @@ define void @test_movlps(<4 x float> %a0
; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_movlps:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_movlps:
@@ -3017,13 +3044,15 @@ define void @test_movlps(<4 x float> %a0
; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_movlps:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movlps:
@@ -3031,13 +3060,15 @@ define void @test_movlps(<4 x float> %a0
; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_movlps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movlps:
@@ -3045,13 +3076,15 @@ define void @test_movlps(<4 x float> %a0
; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movlps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_movlps:
@@ -3059,13 +3092,15 @@ define void @test_movlps(<4 x float> %a0
; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_movlps:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_movlps:
@@ -3073,6 +3108,7 @@ define void @test_movlps(<4 x float> %a0
; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = bitcast x86_mmx* %a2 to <2 x float>*
%2 = load <2 x float>, <2 x float> *%1, align 8
@@ -3081,7 +3117,7 @@ define void @test_movlps(<4 x float> %a0
%5 = fadd <4 x float> %a0, %4
%6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
store <2 x float> %6, <2 x float>* %1
- ret void
+ ret <4 x float> %4
}
define i32 @test_movmskps(<4 x float> %a0) {
Modified: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-schedule.ll?rev=343858&r1=343857&r2=343858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll Fri Oct 5 08:50:18 2018
@@ -4836,33 +4836,38 @@ define i64 @test_movd_64(<2 x i64> %a0,
ret i64 %7
}
-define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
+define <2 x double> @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; GENERIC-LABEL: test_movhpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; GENERIC-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movhpd:
; ATOM: # %bb.0:
+; ATOM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
-; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; ATOM-NEXT: addpd %xmm1, %xmm2 # sched: [6:3.00]
+; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: movhpd %xmm2, (%rdi) # sched: [1:1.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_movhpd:
; SLM: # %bb.0:
; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
-; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_movhpd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movhpd:
@@ -4870,13 +4875,15 @@ define void @test_movhpd(<2 x double> %a
; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movhpd:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_movhpd:
@@ -4884,13 +4891,15 @@ define void @test_movhpd(<2 x double> %a
; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_movhpd:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_movhpd:
@@ -4898,13 +4907,15 @@ define void @test_movhpd(<2 x double> %a
; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_movhpd:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movhpd:
@@ -4912,13 +4923,15 @@ define void @test_movhpd(<2 x double> %a
; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_movhpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movhpd:
@@ -4926,13 +4939,15 @@ define void @test_movhpd(<2 x double> %a
; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movhpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_movhpd:
@@ -4940,13 +4955,15 @@ define void @test_movhpd(<2 x double> %a
; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_movhpd:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_movhpd:
@@ -4954,6 +4971,7 @@ define void @test_movhpd(<2 x double> %a
; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = bitcast x86_mmx* %a2 to double*
%2 = load double, double *%1, align 8
@@ -4961,36 +4979,41 @@ define void @test_movhpd(<2 x double> %a
%4 = fadd <2 x double> %a0, %3
%5 = extractelement <2 x double> %4, i32 1
store double %5, double* %1
- ret void
+ ret <2 x double> %3
}
-define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
+define <2 x double> @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; GENERIC-LABEL: test_movlpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; GENERIC-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movlpd:
; ATOM: # %bb.0:
+; ATOM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
-; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; ATOM-NEXT: addpd %xmm1, %xmm2 # sched: [6:3.00]
+; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: movlpd %xmm2, (%rdi) # sched: [1:1.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_movlpd:
; SLM: # %bb.0:
; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
-; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_movlpd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movlpd:
@@ -4998,13 +5021,15 @@ define void @test_movlpd(<2 x double> %a
; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movlpd:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_movlpd:
@@ -5012,13 +5037,15 @@ define void @test_movlpd(<2 x double> %a
; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_movlpd:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_movlpd:
@@ -5026,13 +5053,15 @@ define void @test_movlpd(<2 x double> %a
; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_movlpd:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movlpd:
@@ -5040,13 +5069,15 @@ define void @test_movlpd(<2 x double> %a
; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_movlpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movlpd:
@@ -5054,13 +5085,15 @@ define void @test_movlpd(<2 x double> %a
; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movlpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_movlpd:
@@ -5068,13 +5101,15 @@ define void @test_movlpd(<2 x double> %a
; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_movlpd:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_movlpd:
@@ -5082,6 +5117,7 @@ define void @test_movlpd(<2 x double> %a
; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = bitcast x86_mmx* %a2 to double*
%2 = load double, double *%1, align 8
@@ -5089,7 +5125,7 @@ define void @test_movlpd(<2 x double> %a
%4 = fadd <2 x double> %a0, %3
%5 = extractelement <2 x double> %4, i32 0
store double %5, double* %1
- ret void
+ ret <2 x double> %3
}
define i32 @test_movmskpd(<2 x double> %a0) {
More information about the llvm-commits
mailing list